#ifndef FORTRAN_GRAMMAR_H_ #define FORTRAN_GRAMMAR_H_ // Top-level grammar specification for Fortran. These parsers drive // tokenizing and raw character parsers (cooked-tokens.h, cooked-chars.h) // to recognize the productions of Fortran and to construct a parse tree. // See parser-combinators.txt for documentation on the parser combinator // library used here to implement an LL recursive descent recognizer. #include "basic-parsers.h" #include "cooked-chars.h" #include "cooked-tokens.h" #include "format-specification.h" #include "parse-tree.h" #include "user-state.h" #include #include #include #include #include #include #include #include #include namespace Fortran { // The productions that follow are derived from the draft Fortran 2018 // standard, with some necessary modifications to remove left recursion // and some generalization in order to defer cases where parses depend // on the definitions of symbols. The "Rxxx" numbers that appear in // comments refer to these numbered requirements in the Fortran standard. // Many parsers in this grammar are defined as instances of this Parser<> // template class. This usage requires that their Parse() member functions // be defined separately, typically with a parsing expression wrapped up // in an TYPE_PARSER() macro call. template struct Parser { using resultType = A; constexpr Parser() {} static inline std::optional Parse(ParseState *); }; #define TYPE_PARSER(pexpr) \ template<> \ inline std::optional \ Parser::Parse(ParseState *state) { \ return (pexpr).Parse(state); \ } #define TYPE_CONTEXT_PARSER(contextString, pexpr) \ template<> \ inline std::optional \ Parser::Parse(ParseState *state) { \ return inContext((contextString), (pexpr)).Parse(state); \ } // Some specializations of Parser<> are used multiple times, or are // of some special importance, so we instantiate them once here and // give them names rather than referencing them as anonymous Parser{} // objects in the right-hand sides of productions. constexpr Parser program; // R501 - the "top level" production constexpr Parser specificationPart; // R504 constexpr Parser implicitPart; // R505 constexpr Parser declarationConstruct; // R507 constexpr Parser specificationConstruct; // R508 constexpr Parser executionPartConstruct; // R510 constexpr Parser internalSubprogramPart; // R511 constexpr Parser name; // R603 constexpr Parser literalConstant; // R605 constexpr Parser namedConstant; // R606 constexpr Parser typeParamValue; // R701 constexpr Parser typeSpec; // R702 constexpr Parser declarationTypeSpec; // R703 constexpr Parser intrinsicTypeSpec; // R704 constexpr Parser integerTypeSpec; // R705 constexpr Parser kindSelector; // R706 constexpr Parser signedIntLiteralConstant; // R707 constexpr Parser intLiteralConstant; // R708 constexpr Parser kindParam; // R709 constexpr Parser realLiteralConstant; // R714 constexpr Parser exponentPart; // R717 constexpr Parser charLength; // R723 constexpr Parser charLiteralConstant; // R724 constexpr Parser initialization; // R743 & R805 constexpr Parser derivedTypeSpec; // R754 constexpr Parser typeDeclarationStmt; // R801 constexpr Parser nullInit; // R806 constexpr Parser accessSpec; // R807 constexpr Parser languageBindingSpec; // R808, R1528 constexpr Parser entityDecl; // R803 constexpr Parser coarraySpec; // R809 constexpr Parser arraySpec; // R815 constexpr Parser explicitShapeSpec; // R816 constexpr Parser deferredShapeSpecList; // R820 constexpr Parser assumedImpliedSpec; // R821 constexpr Parser intentSpec; // R826 constexpr Parser dataStmt; // R837 constexpr Parser dataImpliedDo; // R840 constexpr Parser parameterStmt; // R851 constexpr Parser designator; // R901 constexpr Parser variable; // R902 constexpr Parser substring; // R908 constexpr Parser dataReference; // R911, R914, R917 constexpr Parser structureComponent; // R913 constexpr Parser statVariable; // R929 constexpr Parser statOrErrmsg; // R942 & R1165 constexpr Parser definedOpName; // R1003, R1023, R1414, & R1415 constexpr Parser expr; // R1022 constexpr Parser specificationExpr; // R1028 constexpr Parser assignmentStmt; // R1032 constexpr Parser pointerAssignmentStmt; // R1033 constexpr Parser whereStmt; // R1041, R1045, R1046 constexpr Parser whereConstruct; // R1042 constexpr Parser whereBodyConstruct; // R1044 constexpr Parser forallConstruct; // R1050 constexpr Parser forallAssignmentStmt; // R1053 constexpr Parser forallStmt; // R1055 constexpr Parser selector; // R1105 constexpr Parser endSelectStmt; // R1143 & R1151 & R1155 constexpr Parser loopControl; // R1123 constexpr Parser concurrentHeader; // R1125 constexpr Parser endDoStmt; // R1132 constexpr Parser ioUnit; // R1201, R1203 constexpr Parser fileUnitNumber; // R1202 constexpr Parser ioControlSpec; // R1213, R1214 constexpr Parser format; // R1215 constexpr Parser inputItem; // R1216 constexpr Parser outputItem; // R1217 constexpr Parser inputImpliedDo; // R1218, R1219 constexpr Parser outputImpliedDo; // R1218, R1219 constexpr Parser positionOrFlushSpec; // R1227 & R1229 constexpr Parser formatStmt; // R1301 constexpr Parser endProgramStmt; // R1403 constexpr Parser interfaceBlock; // R1501 constexpr Parser genericSpec; // R1508 constexpr Parser procInterface; // R1513 constexpr Parser procDecl; // R1515 constexpr Parser functionReference; // R1520 constexpr Parser actualArgSpec; // R1523 constexpr Parser prefixSpec; // R1527 constexpr Parser functionSubprogram; // R1529 constexpr Parser functionStmt; // R1530 constexpr Parser suffix; // R1532 constexpr Parser endFunctionStmt; // R1533 constexpr Parser subroutineSubprogram; // R1534 constexpr Parser subroutineStmt; // R1535 constexpr Parser dummyArg; // R1536 constexpr Parser endSubroutineStmt; // R1537 constexpr Parser entryStmt; // R1541 constexpr Parser containsStmt; // R1543 // For a parser p, indirect(p) returns a parser that builds an indirect // reference to p's return type. template inline constexpr auto indirect(const PA &p) { return construct>{}(p); } // R711 digit-string -> digit [digit]... // N.B. not a token -- no spaces are skipped constexpr auto digitString = DigitString{}; // statement(p) parses Statement

for some statement type P that is the // result type of the argument parser p, while also handling labels and // end-of-statement markers. // R611 label -> digit [digit]... constexpr auto label = spaces >> digitString; static inline bool isColumnOkForFixedFormLabel(int &&column) { return column < 6; } constexpr auto isLabelOk = inFixedForm >> applyFunction(isColumnOkForFixedFormLabel, getColumn) || pure(true); template using statementConstructor = construct>; template inline constexpr auto unterminatedStatement(const PA &p) { return skipMany("\n"_tok) >> statementConstructor{}(getPosition, maybe(label), isLabelOk, spaces >> p); } constexpr auto endOfLine = CharMatch<'\n'>{} || fail("expected end of line"); constexpr auto endOfStmt = spaces >> CharMatch<';'>{} / skipMany(";"_tok) || endOfLine; template inline constexpr auto statement(const PA &p) { return unterminatedStatement(p) / endOfStmt; } // R507 declaration-construct -> // specification-construct | data-stmt | format-stmt | // entry-stmt | stmt-function-stmt TYPE_CONTEXT_PARSER("declaration construct", construct{}(specificationConstruct) || construct{}(statement(indirect(dataStmt))) || construct{}(statement(indirect(formatStmt))) || construct{}(statement(indirect(entryStmt))) || construct{}( statement(indirect(Parser{})))) // R508 specification-construct -> // derived-type-def | enum-def | generic-stmt | interface-block | // parameter-stmt | procedure-declaration-stmt | // other-specification-stmt | type-declaration-stmt TYPE_CONTEXT_PARSER("specification construct", construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}( statement(indirect(Parser{}))) || construct{}(indirect(interfaceBlock)) || construct{}( statement(indirect(parameterStmt))) || construct{}( statement(indirect(Parser{}))) || construct{}( statement(Parser{})) || construct{}( statement(indirect(typeDeclarationStmt))) || construct{}(indirect(Parser{}))) // R513 other-specification-stmt -> // access-stmt | allocatable-stmt | asynchronous-stmt | bind-stmt | // codimension-stmt | contiguous-stmt | dimension-stmt | external-stmt | // intent-stmt | intrinsic-stmt | namelist-stmt | optional-stmt | // pointer-stmt | protected-stmt | save-stmt | target-stmt | // volatile-stmt | value-stmt | common-stmt | equivalence-stmt TYPE_PARSER( construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{}))) // R516 keyword -> name constexpr auto keyword = name; // R604 constant -> literal-constant | named-constant // Used only via R607 int-constant and R845 data-stmt-constant. TYPE_PARSER(construct{}(literalConstant) || construct{}(namedConstant)) // R608 intrinsic-operator -> // power-op | mult-op | add-op | concat-op | rel-op | // not-op | and-op | or-op | equiv-op // R610 extended-intrinsic-op -> intrinsic-operator // These parsers must be ordered carefully to avoid misrecognition. constexpr auto namedIntrinsicOperator = ".LT." >> pure(DefinedOperator::IntrinsicOperator::LT) || ".LE." >> pure(DefinedOperator::IntrinsicOperator::LE) || ".EQ." >> pure(DefinedOperator::IntrinsicOperator::EQ) || ".NE." >> pure(DefinedOperator::IntrinsicOperator::NE) || ".GE." >> pure(DefinedOperator::IntrinsicOperator::GE) || ".GT." >> pure(DefinedOperator::IntrinsicOperator::GT) || ".NOT." >> pure(DefinedOperator::IntrinsicOperator::NOT) || ".AND." >> pure(DefinedOperator::IntrinsicOperator::AND) || ".OR." >> pure(DefinedOperator::IntrinsicOperator::OR) || ".EQV." >> pure(DefinedOperator::IntrinsicOperator::EQV) || ".NEQV." >> pure(DefinedOperator::IntrinsicOperator::NEQV); // Cray and ifort also have .XOR.; Cray has .N./.A./.O./.X. abbreviations constexpr auto intrinsicOperator = "**" >> pure(DefinedOperator::IntrinsicOperator::Power) || "*" >> pure(DefinedOperator::IntrinsicOperator::Multiply) || "//" >> pure(DefinedOperator::IntrinsicOperator::Concat) || "/=" >> pure(DefinedOperator::IntrinsicOperator::NE) || "/" >> pure(DefinedOperator::IntrinsicOperator::Divide) || "+" >> pure(DefinedOperator::IntrinsicOperator::Add) || "-" >> pure(DefinedOperator::IntrinsicOperator::Subtract) || "<=" >> pure(DefinedOperator::IntrinsicOperator::LE) || extension("<>" >> pure(DefinedOperator::IntrinsicOperator::NE)) || "<" >> pure(DefinedOperator::IntrinsicOperator::LT) || "==" >> pure(DefinedOperator::IntrinsicOperator::EQ) || ">=" >> pure(DefinedOperator::IntrinsicOperator::GE) || ">" >> pure(DefinedOperator::IntrinsicOperator::GT) || namedIntrinsicOperator; // R609 defined-operator -> // defined-unary-op | defined-binary-op | extended-intrinsic-op TYPE_PARSER(construct{}(intrinsicOperator) || construct{}(definedOpName)) // R401 xzy-list -> xzy [, xzy]... template inline constexpr auto nonemptyList(const PA &p) { return nonemptySeparated(p, ","_tok); // p-list } template inline constexpr auto optionalList(const PA &p) { return defaulted(nonemptySeparated(p, ","_tok)); // [p-list] } // R402 xzy-name -> name // R403 scalar-xyz -> xyz // Also define constant-xyz, int-xyz, default-char-xyz. template inline constexpr auto scalar(const PA &p) { return construct>{}(p); // scalar-p } template inline constexpr auto constant(const PA &p) { return construct>{}(p); // constant-p } template inline constexpr auto integer(const PA &p) { return construct>{}(p); // int-p } template inline constexpr auto logical(const PA &p) { return construct>{}(p); // logical-p } template inline constexpr auto defaultChar(const PA &p) { return construct>{}( p); // default-char-p } // R1024 logical-expr -> expr constexpr auto logicalExpr = logical(indirect(expr)); constexpr auto scalarLogicalExpr = scalar(logicalExpr); // R1025 default-char-expr -> expr constexpr auto defaultCharExpr = defaultChar(indirect(expr)); constexpr auto scalarDefaultCharExpr = scalar(defaultCharExpr); // R1026 int-expr -> expr constexpr auto intExpr = integer(indirect(expr)); constexpr auto scalarIntExpr = scalar(intExpr); // R1029 constant-expr -> expr constexpr auto constantExpr = constant(indirect(expr)); // R1030 default-char-constant-expr -> default-char-expr constexpr auto scalarDefaultCharConstantExpr = scalar(defaultChar(constantExpr)); // R1031 int-constant-expr -> int-expr constexpr auto intConstantExpr = integer(constantExpr); constexpr auto scalarIntConstantExpr = scalar(intConstantExpr); // R501 program -> program-unit [program-unit]... // This is the top-level production for the Fortran language. struct StartNewSubprogram { using resultType = Success; static std::optional Parse(ParseState *state) { if (auto ustate = state->userState()) { ustate->NewSubprogram(); } return {Success{}}; } } startNewSubprogram; TYPE_PARSER(construct{}( some(startNewSubprogram >> Parser{} / endOfLine)) / skipMany(endOfLine) / consumedAllInput) // R502 program-unit -> // main-program | external-subprogram | module | submodule | block-data // R503 external-subprogram -> function-subprogram | subroutine-subprogram TYPE_PARSER(construct{}(indirect(functionSubprogram)) || construct{}(indirect(subroutineSubprogram)) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{}))) // R504 specification-part -> // [use-stmt]... [import-stmt]... [implicit-part] // [declaration-construct]... TYPE_CONTEXT_PARSER("specification part", construct{}(many(statement(indirect(Parser{}))), many(statement(indirect(Parser{}))), implicitPart, many(declarationConstruct))) // R505 implicit-part -> [implicit-part-stmt]... implicit-stmt // TODO: Can overshoot; any trailing PARAMETER, FORMAT, & ENTRY // statements after the last IMPLICIT should be transferred to the // list of declaration-constructs. TYPE_CONTEXT_PARSER("implicit part", construct{}(many(Parser{}))) // R506 implicit-part-stmt -> // implicit-stmt | parameter-stmt | format-stmt | entry-stmt TYPE_PARSER(construct{}( statement(indirect(Parser{}))) || construct{}(statement(indirect(parameterStmt))) || construct{}(statement(indirect(formatStmt))) || construct{}(statement(indirect(entryStmt)))) // R512 internal-subprogram -> function-subprogram | subroutine-subprogram constexpr auto internalSubprogram = (construct{}(indirect(functionSubprogram)) || construct{}(indirect(subroutineSubprogram))) / endOfStmt; // R511 internal-subprogram-part -> contains-stmt [internal-subprogram]... TYPE_CONTEXT_PARSER("internal subprogram part", construct{}(statement(containsStmt), many(startNewSubprogram >> internalSubprogram))) // R515 action-stmt -> // allocate-stmt | assignment-stmt | backspace-stmt | call-stmt | // close-stmt | continue-stmt | cycle-stmt | deallocate-stmt | // endfile-stmt | error-stop-stmt | event-post-stmt | event-wait-stmt | // exit-stmt | fail-image-stmt | flush-stmt | form-team-stmt | // goto-stmt | if-stmt | inquire-stmt | lock-stmt | nullify-stmt | // open-stmt | pointer-assignment-stmt | print-stmt | read-stmt | // return-stmt | rewind-stmt | stop-stmt | sync-all-stmt | // sync-images-stmt | sync-memory-stmt | sync-team-stmt | unlock-stmt | // wait-stmt | where-stmt | write-stmt | computed-goto-stmt | forall-stmt // R1159 continue-stmt -> CONTINUE // R1163 fail-image-stmt -> FAIL IMAGE constexpr auto actionStmt = construct{}( indirect(Parser{})) || construct{}(indirect(assignmentStmt)) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || "CONTINUE" >> construct{}(construct{}) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || "FAIL IMAGE" >> construct{}(construct{}) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(pointerAssignmentStmt)) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}( indirect(Parser{})) || // & error-stop-stmt construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(whereStmt)) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(forallStmt)) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})) || construct{}(indirect(Parser{})); // Fortran allows the statement with the corresponding label at the end of // a do-construct that begins with an old-style label-do-stmt to be a // new-style END DO statement; e.g., DO 10 I=1,N; ...; 10 END DO. Usually, // END DO statements appear only at the ends of do-constructs that begin // with a nonlabel-do-stmt, so care must be taken to recognize this case and // essentially treat them like CONTINUE statements. struct CapturedLabelDoStmt { static constexpr auto parser = statement(indirect(Parser{})); using resultType = Statement>; static std::optional Parse(ParseState *state) { auto result = parser.Parse(state); if (result) { if (auto ustate = state->userState()) { ustate->NewDoLabel(std::get