#ifndef FORTRAN_PARSER_PRESCAN_H_ #define FORTRAN_PARSER_PRESCAN_H_ // Defines a fast Fortran source prescanning phase that implements some // character-level features of the language that can be inefficient to // support directly in a backtracking parser. This phase handles Fortran // line continuation, comment removal, card image margins, padding out // fixed form character literals on truncated card images, file // inclusion, and driving the Fortran source preprocessor. #include "characters.h" #include "message.h" #include "provenance.h" #include "token-sequence.h" #include #include #include #include namespace Fortran { namespace parser { class Messages; class Preprocessor; class Prescanner { public: Prescanner(Messages &, CookedSource &, Preprocessor &); Prescanner(const Prescanner &); Messages &messages() const { return messages_; } Prescanner &set_fixedForm(bool yes) { inFixedForm_ = yes; return *this; } Prescanner &set_encoding(Encoding code) { encoding_ = code; return *this; } Prescanner &set_enableOldDebugLines(bool yes) { enableOldDebugLines_ = yes; return *this; } Prescanner &set_enableBackslashEscapesInCharLiterals(bool yes) { enableBackslashEscapesInCharLiterals_ = yes; return *this; } Prescanner &set_fixedFormColumnLimit(int limit) { fixedFormColumnLimit_ = limit; return *this; } Prescanner &set_warnOnNonstandardUsage(bool yes) { warnOnNonstandardUsage_ = yes; return *this; } Prescanner &AddCompilerDirectiveSentinel(const std::string &); void Prescan(ProvenanceRange); void Statement(); void NextLine(); // Callbacks for use by Preprocessor. bool IsAtEnd() const { return lineStart_ >= limit_; } bool IsNextLinePreprocessorDirective() const; TokenSequence TokenizePreprocessorDirective(); Provenance GetCurrentProvenance() const { return GetProvenance(at_); } void Say(Message &&); void Say(MessageFixedText, Provenance); void Say(MessageFormattedText &&, Provenance); private: struct LineClassification { enum class Kind { Comment, PreprocessorDirective, Include, CompilerDirective, Source }; LineClassification(Kind k, std::size_t po = 0, const char *s = nullptr) : kind{k}, payloadOffset{po}, sentinel{s} {} LineClassification(LineClassification &&) = default; Kind kind; std::size_t payloadOffset; // byte offset of content const char *sentinel; // if it's a compiler directive }; void BeginSourceLine(const char *at) { at_ = at; column_ = 1; tabInCurrentLine_ = false; slashInCurrentLine_ = false; preventHollerith_ = false; delimiterNesting_ = 0; } void BeginSourceLineAndAdvance() { BeginSourceLine(lineStart_); NextLine(); } Provenance GetProvenance(const char *sourceChar) const { return startProvenance_ + (sourceChar - start_); } void EmitChar(TokenSequence *tokens, char ch) { tokens->PutNextTokenChar(ch, GetCurrentProvenance()); } void EmitInsertedChar(TokenSequence *tokens, char ch) { Provenance provenance{cooked_.allSources().CompilerInsertionProvenance(ch)}; tokens->PutNextTokenChar(ch, provenance); } char EmitCharAndAdvance(TokenSequence *tokens, char ch) { EmitChar(tokens, ch); NextChar(); return *at_; } void LabelField(TokenSequence *); void NextChar(); void SkipSpaces(); bool NextToken(TokenSequence *); bool ExponentAndKind(TokenSequence *); void QuotedCharacterLiteral(TokenSequence *); void Hollerith(TokenSequence *, int); bool PadOutCharacterLiteral(TokenSequence *); void SkipCommentLinesAndPreprocessorDirectives(); bool IsFixedFormCommentLine(const char *) const; bool IsFreeFormComment(const char *) const; std::optional IsIncludeLine(const char *) const; void FortranInclude(const char *quote); bool IsPreprocessorDirectiveLine(const char *) const; const char *FixedFormContinuationLine(); bool FixedFormContinuation(); bool FreeFormContinuation(); std::optional IsFixedFormCompilerDirectiveLine( const char *) const; std::optional IsFreeFormCompilerDirectiveLine( const char *) const; const char *IsCompilerDirectiveSentinel(const char *) const; LineClassification ClassifyLine(const char *) const; void SourceFormChange(std::string &&); Messages &messages_; CookedSource &cooked_; Preprocessor &preprocessor_; bool anyFatalErrors_{false}; bool inFixedForm_{false}; int fixedFormColumnLimit_{72}; Encoding encoding_{Encoding::UTF8}; bool enableOldDebugLines_{false}; bool enableBackslashEscapesInCharLiterals_{true}; bool warnOnNonstandardUsage_{false}; int delimiterNesting_{0}; int prescannerNesting_{0}; Provenance startProvenance_; const char *start_{nullptr}; // beginning of current source file content const char *limit_{nullptr}; // first address after end of current source const char *lineStart_{nullptr}; // next line to process; <= limit_ const char *directiveSentinel_{nullptr}; // current compiler directive // This data members are state for processing the source line containing // "at_", which goes to up to the newline character before "lineStart_". const char *at_{nullptr}; // next character to process; < lineStart_ int column_{1}; // card image column position of next character bool tabInCurrentLine_{false}; bool slashInCurrentLine_{false}; bool preventHollerith_{false}; bool inCharLiteral_{false}; bool inPreprocessorDirective_{false}; const Provenance spaceProvenance_{ cooked_.allSources().CompilerInsertionProvenance(' ')}; const Provenance backslashProvenance_{ cooked_.allSources().CompilerInsertionProvenance('\\')}; const ProvenanceRange sixSpaceProvenance_{ cooked_.allSources().AddCompilerInsertion(" "s)}; // To avoid probing the set of active compiler directive sentinel strings // on every comment line, they're checked first with a cheap Bloom filter. static const int prime1{1019}, prime2{1021}; std::bitset compilerDirectiveBloomFilter_; // 128 bytes std::unordered_set compilerDirectiveSentinels_; }; } // namespace parser } // namespace Fortran #endif // FORTRAN_PARSER_PRESCAN_H_