llvm/flang/lib/Parser/prescan.cpp
peter klausler 094b380c21 [flang] Catch mismatched parentheses in prescanner
Source lines with mismatched parentheses are hard cases for error
recovery in parsing, and the best error message (viz.,
"here's an unmatched parenthesis") can be emitted from the
prescanner.

Differential Revision: https://reviews.llvm.org/D111254#3046173
2021-10-06 14:00:15 -07:00

1235 lines
37 KiB
C++

//===-- lib/Parser/prescan.cpp --------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "prescan.h"
#include "preprocessor.h"
#include "token-sequence.h"
#include "flang/Common/idioms.h"
#include "flang/Parser/characters.h"
#include "flang/Parser/message.h"
#include "flang/Parser/source.h"
#include "llvm/Support/raw_ostream.h"
#include <cstddef>
#include <cstring>
#include <utility>
#include <vector>
namespace Fortran::parser {
using common::LanguageFeature;
static constexpr int maxPrescannerNesting{100};
Prescanner::Prescanner(Messages &messages, CookedSource &cooked,
Preprocessor &preprocessor, common::LanguageFeatureControl lfc)
: messages_{messages}, cooked_{cooked}, preprocessor_{preprocessor},
allSources_{preprocessor_.allSources()}, features_{lfc},
encoding_{allSources_.encoding()} {}
Prescanner::Prescanner(const Prescanner &that)
: messages_{that.messages_}, cooked_{that.cooked_},
preprocessor_{that.preprocessor_}, allSources_{that.allSources_},
features_{that.features_}, inFixedForm_{that.inFixedForm_},
fixedFormColumnLimit_{that.fixedFormColumnLimit_},
encoding_{that.encoding_}, prescannerNesting_{that.prescannerNesting_ +
1},
skipLeadingAmpersand_{that.skipLeadingAmpersand_},
compilerDirectiveBloomFilter_{that.compilerDirectiveBloomFilter_},
compilerDirectiveSentinels_{that.compilerDirectiveSentinels_} {}
static inline constexpr bool IsFixedFormCommentChar(char ch) {
return ch == '!' || ch == '*' || ch == 'C' || ch == 'c';
}
static void NormalizeCompilerDirectiveCommentMarker(TokenSequence &dir) {
char *p{dir.GetMutableCharData()};
char *limit{p + dir.SizeInChars()};
for (; p < limit; ++p) {
if (*p != ' ') {
CHECK(IsFixedFormCommentChar(*p));
*p = '!';
return;
}
}
DIE("compiler directive all blank");
}
void Prescanner::Prescan(ProvenanceRange range) {
startProvenance_ = range.start();
start_ = allSources_.GetSource(range);
CHECK(start_);
limit_ = start_ + range.size();
nextLine_ = start_;
const bool beganInFixedForm{inFixedForm_};
if (prescannerNesting_ > maxPrescannerNesting) {
Say(GetProvenance(start_),
"too many nested INCLUDE/#include files, possibly circular"_err_en_US);
return;
}
while (!IsAtEnd()) {
Statement();
}
if (inFixedForm_ != beganInFixedForm) {
std::string dir{"!dir$ "};
if (beganInFixedForm) {
dir += "fixed";
} else {
dir += "free";
}
dir += '\n';
TokenSequence tokens{dir, allSources_.AddCompilerInsertion(dir).start()};
tokens.Emit(cooked_);
}
}
void Prescanner::Statement() {
TokenSequence tokens;
LineClassification line{ClassifyLine(nextLine_)};
switch (line.kind) {
case LineClassification::Kind::Comment:
nextLine_ += line.payloadOffset; // advance to '!' or newline
NextLine();
return;
case LineClassification::Kind::IncludeLine:
FortranInclude(nextLine_ + line.payloadOffset);
NextLine();
return;
case LineClassification::Kind::ConditionalCompilationDirective:
case LineClassification::Kind::IncludeDirective:
case LineClassification::Kind::DefinitionDirective:
case LineClassification::Kind::PreprocessorDirective:
preprocessor_.Directive(TokenizePreprocessorDirective(), *this);
return;
case LineClassification::Kind::CompilerDirective:
directiveSentinel_ = line.sentinel;
CHECK(InCompilerDirective());
BeginStatementAndAdvance();
if (inFixedForm_) {
CHECK(IsFixedFormCommentChar(*at_));
} else {
while (*at_ == ' ' || *at_ == '\t') {
++at_, ++column_;
}
CHECK(*at_ == '!');
}
if (directiveSentinel_[0] == '$' && directiveSentinel_[1] == '\0') {
// OpenMP conditional compilation line. Remove the sentinel and then
// treat the line as if it were normal source.
at_ += 2, column_ += 2;
if (inFixedForm_) {
LabelField(tokens);
} else {
SkipSpaces();
}
} else {
// Compiler directive. Emit normalized sentinel.
EmitChar(tokens, '!');
++at_, ++column_;
for (const char *sp{directiveSentinel_}; *sp != '\0';
++sp, ++at_, ++column_) {
EmitChar(tokens, *sp);
}
if (*at_ == ' ') {
EmitChar(tokens, ' ');
++at_, ++column_;
}
tokens.CloseToken();
}
break;
case LineClassification::Kind::Source:
BeginStatementAndAdvance();
if (inFixedForm_) {
LabelField(tokens);
} else if (skipLeadingAmpersand_) {
skipLeadingAmpersand_ = false;
const char *p{SkipWhiteSpace(at_)};
if (p < limit_ && *p == '&') {
column_ += ++p - at_;
at_ = p;
}
} else {
SkipSpaces();
}
break;
}
while (NextToken(tokens)) {
}
Provenance newlineProvenance{GetCurrentProvenance()};
if (std::optional<TokenSequence> preprocessed{
preprocessor_.MacroReplacement(tokens, *this)}) {
// Reprocess the preprocessed line. Append a newline temporarily.
preprocessed->PutNextTokenChar('\n', newlineProvenance);
preprocessed->CloseToken();
const char *ppd{preprocessed->ToCharBlock().begin()};
LineClassification ppl{ClassifyLine(ppd)};
preprocessed->pop_back(); // remove the newline
switch (ppl.kind) {
case LineClassification::Kind::Comment:
break;
case LineClassification::Kind::IncludeLine:
FortranInclude(ppd + ppl.payloadOffset);
break;
case LineClassification::Kind::ConditionalCompilationDirective:
case LineClassification::Kind::IncludeDirective:
case LineClassification::Kind::DefinitionDirective:
case LineClassification::Kind::PreprocessorDirective:
Say(preprocessed->GetProvenanceRange(),
"Preprocessed line resembles a preprocessor directive"_en_US);
preprocessed->ToLowerCase()
.CheckBadFortranCharacters(messages_)
.CheckBadParentheses(messages_)
.Emit(cooked_);
break;
case LineClassification::Kind::CompilerDirective:
if (preprocessed->HasRedundantBlanks()) {
preprocessed->RemoveRedundantBlanks();
}
NormalizeCompilerDirectiveCommentMarker(*preprocessed);
preprocessed->ToLowerCase();
SourceFormChange(preprocessed->ToString());
preprocessed->ClipComment(true /* skip first ! */)
.CheckBadFortranCharacters(messages_)
.CheckBadParentheses(messages_)
.Emit(cooked_);
break;
case LineClassification::Kind::Source:
if (inFixedForm_) {
if (preprocessed->HasBlanks(/*after column*/ 6)) {
preprocessed->RemoveBlanks(/*after column*/ 6);
}
} else {
if (preprocessed->HasRedundantBlanks()) {
preprocessed->RemoveRedundantBlanks();
}
}
preprocessed->ToLowerCase()
.ClipComment()
.CheckBadFortranCharacters(messages_)
.CheckBadParentheses(messages_)
.Emit(cooked_);
break;
}
} else {
tokens.ToLowerCase();
if (line.kind == LineClassification::Kind::CompilerDirective) {
SourceFormChange(tokens.ToString());
}
if (inFixedForm_ && line.kind == LineClassification::Kind::Source) {
EnforceStupidEndStatementRules(tokens);
}
tokens.CheckBadFortranCharacters(messages_)
.CheckBadParentheses(messages_)
.Emit(cooked_);
}
if (omitNewline_) {
omitNewline_ = false;
} else {
cooked_.Put('\n', newlineProvenance);
}
directiveSentinel_ = nullptr;
}
TokenSequence Prescanner::TokenizePreprocessorDirective() {
CHECK(!IsAtEnd() && !inPreprocessorDirective_);
inPreprocessorDirective_ = true;
BeginStatementAndAdvance();
TokenSequence tokens;
while (NextToken(tokens)) {
}
inPreprocessorDirective_ = false;
return tokens;
}
void Prescanner::NextLine() {
void *vstart{static_cast<void *>(const_cast<char *>(nextLine_))};
void *v{std::memchr(vstart, '\n', limit_ - nextLine_)};
if (!v) {
nextLine_ = limit_;
} else {
const char *nl{const_cast<const char *>(static_cast<char *>(v))};
nextLine_ = nl + 1;
}
}
void Prescanner::LabelField(TokenSequence &token) {
const char *bad{nullptr};
int outCol{1};
const char *start{at_};
for (; *at_ != '\n' && column_ <= 6; ++at_) {
if (*at_ == '\t') {
++at_;
column_ = 7;
break;
}
if (*at_ != ' ' &&
!(*at_ == '0' && column_ == 6)) { // '0' in column 6 becomes space
EmitChar(token, *at_);
++outCol;
if (!bad && !IsDecimalDigit(*at_)) {
bad = at_;
}
}
++column_;
}
if (bad && !preprocessor_.IsNameDefined(token.CurrentOpenToken())) {
Say(GetProvenance(bad),
"Character in fixed-form label field must be a digit"_en_US);
token.clear();
at_ = start;
return;
}
if (outCol == 1) { // empty label field
// Emit a space so that, if the line is rescanned after preprocessing,
// a leading 'C' or 'D' won't be left-justified and then accidentally
// misinterpreted as a comment card.
EmitChar(token, ' ');
++outCol;
}
token.CloseToken();
SkipToNextSignificantCharacter();
if (IsDecimalDigit(*at_)) {
Say(GetProvenance(at_),
"Label digit is not in fixed-form label field"_en_US);
}
}
// 6.3.3.5: A program unit END statement, or any other statement whose
// initial line resembles an END statement, shall not be continued in
// fixed form source.
void Prescanner::EnforceStupidEndStatementRules(const TokenSequence &tokens) {
CharBlock cBlock{tokens.ToCharBlock()};
const char *str{cBlock.begin()};
std::size_t n{cBlock.size()};
if (n < 3) {
return;
}
std::size_t j{0};
for (; j < n && (str[j] == ' ' || (str[j] >= '0' && str[j] <= '9')); ++j) {
}
if (j + 3 > n || std::memcmp(str + j, "end", 3) != 0) {
return;
}
// It starts with END, possibly after a label.
auto start{allSources_.GetSourcePosition(tokens.GetCharProvenance(j))};
auto end{allSources_.GetSourcePosition(tokens.GetCharProvenance(n - 1))};
if (!start || !end) {
return;
}
if (&start->file == &end->file && start->line == end->line) {
return; // no continuation
}
j += 3;
static const char *const prefixes[]{"program", "subroutine", "function",
"blockdata", "module", "submodule", nullptr};
bool isPrefix{j == n || !IsLegalInIdentifier(str[j])}; // prefix is END
std::size_t endOfPrefix{j - 1};
for (const char *const *p{prefixes}; *p; ++p) {
std::size_t pLen{std::strlen(*p)};
if (j + pLen <= n && std::memcmp(str + j, *p, pLen) == 0) {
isPrefix = true; // END thing as prefix
j += pLen;
endOfPrefix = j - 1;
for (; j < n && IsLegalInIdentifier(str[j]); ++j) {
}
break;
}
}
if (isPrefix) {
auto range{tokens.GetTokenProvenanceRange(1)};
if (j == n) { // END or END thing [name]
Say(range,
"Program unit END statement may not be continued in fixed form source"_err_en_US);
} else {
auto endOfPrefixPos{
allSources_.GetSourcePosition(tokens.GetCharProvenance(endOfPrefix))};
auto next{allSources_.GetSourcePosition(tokens.GetCharProvenance(j))};
if (endOfPrefixPos && next && &endOfPrefixPos->file == &start->file &&
endOfPrefixPos->line == start->line &&
(&next->file != &start->file || next->line != start->line)) {
Say(range,
"Initial line of continued statement must not appear to be a program unit END in fixed form source"_err_en_US);
}
}
}
}
void Prescanner::SkipToEndOfLine() {
while (*at_ != '\n') {
++at_, ++column_;
}
}
bool Prescanner::MustSkipToEndOfLine() const {
if (inFixedForm_ && column_ > fixedFormColumnLimit_ && !tabInCurrentLine_) {
return true; // skip over ignored columns in right margin (73:80)
} else if (*at_ == '!' && !inCharLiteral_) {
return true; // inline comment goes to end of source line
} else {
return false;
}
}
void Prescanner::NextChar() {
CHECK(*at_ != '\n');
++at_, ++column_;
while (at_[0] == '\xef' && at_[1] == '\xbb' && at_[2] == '\xbf') {
// UTF-8 byte order mark - treat this file as UTF-8
at_ += 3;
encoding_ = Encoding::UTF_8;
}
SkipToNextSignificantCharacter();
}
// Skip everything that should be ignored until the next significant
// character is reached; handles C-style comments in preprocessing
// directives, Fortran ! comments, stuff after the right margin in
// fixed form, and all forms of line continuation.
void Prescanner::SkipToNextSignificantCharacter() {
if (inPreprocessorDirective_) {
SkipCComments();
} else {
bool mightNeedSpace{false};
if (MustSkipToEndOfLine()) {
SkipToEndOfLine();
} else {
mightNeedSpace = *at_ == '\n';
}
for (; Continuation(mightNeedSpace); mightNeedSpace = false) {
if (MustSkipToEndOfLine()) {
SkipToEndOfLine();
}
}
if (*at_ == '\t') {
tabInCurrentLine_ = true;
}
}
}
void Prescanner::SkipCComments() {
while (true) {
if (IsCComment(at_)) {
if (const char *after{SkipCComment(at_)}) {
column_ += after - at_;
// May have skipped over one or more newlines; relocate the start of
// the next line.
nextLine_ = at_ = after;
NextLine();
} else {
// Don't emit any messages about unclosed C-style comments, because
// the sequence /* can appear legally in a FORMAT statement. There's
// no ambiguity, since the sequence */ cannot appear legally.
break;
}
} else if (inPreprocessorDirective_ && at_[0] == '\\' && at_ + 2 < limit_ &&
at_[1] == '\n' && !IsAtEnd()) {
BeginSourceLineAndAdvance();
} else {
break;
}
}
}
void Prescanner::SkipSpaces() {
while (*at_ == ' ' || *at_ == '\t') {
NextChar();
}
insertASpace_ = false;
}
const char *Prescanner::SkipWhiteSpace(const char *p) {
while (*p == ' ' || *p == '\t') {
++p;
}
return p;
}
const char *Prescanner::SkipWhiteSpaceAndCComments(const char *p) const {
while (true) {
if (*p == ' ' || *p == '\t') {
++p;
} else if (IsCComment(p)) {
if (const char *after{SkipCComment(p)}) {
p = after;
} else {
break;
}
} else {
break;
}
}
return p;
}
const char *Prescanner::SkipCComment(const char *p) const {
char star{' '}, slash{' '};
p += 2;
while (star != '*' || slash != '/') {
if (p >= limit_) {
return nullptr; // signifies an unterminated comment
}
star = slash;
slash = *p++;
}
return p;
}
bool Prescanner::NextToken(TokenSequence &tokens) {
CHECK(at_ >= start_ && at_ < limit_);
if (InFixedFormSource()) {
SkipSpaces();
} else {
if (*at_ == '/' && IsCComment(at_)) {
// Recognize and skip over classic C style /*comments*/ when
// outside a character literal.
if (features_.ShouldWarn(LanguageFeature::ClassicCComments)) {
Say(GetProvenance(at_), "nonstandard usage: C-style comment"_en_US);
}
SkipCComments();
}
if (*at_ == ' ' || *at_ == '\t') {
// Compress free-form white space into a single space character.
const auto theSpace{at_};
char previous{at_ <= start_ ? ' ' : at_[-1]};
NextChar();
SkipSpaces();
if (*at_ == '\n') {
// Discard white space at the end of a line.
} else if (!inPreprocessorDirective_ &&
(previous == '(' || *at_ == '(' || *at_ == ')')) {
// Discard white space before/after '(' and before ')', unless in a
// preprocessor directive. This helps yield space-free contiguous
// names for generic interfaces like OPERATOR( + ) and
// READ ( UNFORMATTED ), without misinterpreting #define f (notAnArg).
// This has the effect of silently ignoring the illegal spaces in
// the array constructor ( /1,2/ ) but that seems benign; it's
// hard to avoid that while still removing spaces from OPERATOR( / )
// and OPERATOR( // ).
} else {
// Preserve the squashed white space as a single space character.
tokens.PutNextTokenChar(' ', GetProvenance(theSpace));
tokens.CloseToken();
return true;
}
}
}
if (insertASpace_) {
tokens.PutNextTokenChar(' ', spaceProvenance_);
insertASpace_ = false;
}
if (*at_ == '\n') {
return false;
}
const char *start{at_};
if (*at_ == '\'' || *at_ == '"') {
QuotedCharacterLiteral(tokens, start);
preventHollerith_ = false;
} else if (IsDecimalDigit(*at_)) {
int n{0}, digits{0};
static constexpr int maxHollerith{256 /*lines*/ * (132 - 6 /*columns*/)};
do {
if (n < maxHollerith) {
n = 10 * n + DecimalDigitValue(*at_);
}
EmitCharAndAdvance(tokens, *at_);
++digits;
if (InFixedFormSource()) {
SkipSpaces();
}
} while (IsDecimalDigit(*at_));
if ((*at_ == 'h' || *at_ == 'H') && n > 0 && n < maxHollerith &&
!preventHollerith_) {
Hollerith(tokens, n, start);
} else if (*at_ == '.') {
while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) {
}
ExponentAndKind(tokens);
} else if (ExponentAndKind(tokens)) {
} else if (digits == 1 && n == 0 && (*at_ == 'x' || *at_ == 'X') &&
inPreprocessorDirective_) {
do {
EmitCharAndAdvance(tokens, *at_);
} while (IsHexadecimalDigit(*at_));
} else if (IsLetter(*at_)) {
// Handles FORMAT(3I9HHOLLERITH) by skipping over the first I so that
// we don't misrecognize I9HOLLERITH as an identifier in the next case.
EmitCharAndAdvance(tokens, *at_);
} else if (at_[0] == '_' && (at_[1] == '\'' || at_[1] == '"')) { // 4_"..."
EmitCharAndAdvance(tokens, *at_);
QuotedCharacterLiteral(tokens, start);
}
preventHollerith_ = false;
} else if (*at_ == '.') {
char nch{EmitCharAndAdvance(tokens, '.')};
if (!inPreprocessorDirective_ && IsDecimalDigit(nch)) {
while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) {
}
ExponentAndKind(tokens);
} else if (nch == '.' && EmitCharAndAdvance(tokens, '.') == '.') {
EmitCharAndAdvance(tokens, '.'); // variadic macro definition ellipsis
}
preventHollerith_ = false;
} else if (IsLegalInIdentifier(*at_)) {
do {
} while (IsLegalInIdentifier(EmitCharAndAdvance(tokens, *at_)));
if ((*at_ == '\'' || *at_ == '"') &&
tokens.CharAt(tokens.SizeInChars() - 1) == '_') { // kind_"..."
QuotedCharacterLiteral(tokens, start);
}
preventHollerith_ = false;
} else if (*at_ == '*') {
if (EmitCharAndAdvance(tokens, '*') == '*') {
EmitCharAndAdvance(tokens, '*');
} else {
// Subtle ambiguity:
// CHARACTER*2H declares H because *2 is a kind specifier
// DATAC/N*2H / is repeated Hollerith
preventHollerith_ = !slashInCurrentStatement_;
}
} else {
char ch{*at_};
if (ch == '(' || ch == '[') {
++delimiterNesting_;
} else if ((ch == ')' || ch == ']') && delimiterNesting_ > 0) {
--delimiterNesting_;
}
char nch{EmitCharAndAdvance(tokens, ch)};
preventHollerith_ = false;
if ((nch == '=' &&
(ch == '<' || ch == '>' || ch == '/' || ch == '=' || ch == '!')) ||
(ch == nch &&
(ch == '/' || ch == ':' || ch == '*' || ch == '#' || ch == '&' ||
ch == '|' || ch == '<' || ch == '>')) ||
(ch == '=' && nch == '>')) {
// token comprises two characters
EmitCharAndAdvance(tokens, nch);
} else if (ch == '/') {
slashInCurrentStatement_ = true;
}
}
tokens.CloseToken();
return true;
}
bool Prescanner::ExponentAndKind(TokenSequence &tokens) {
char ed{ToLowerCaseLetter(*at_)};
if (ed != 'e' && ed != 'd') {
return false;
}
EmitCharAndAdvance(tokens, ed);
if (*at_ == '+' || *at_ == '-') {
EmitCharAndAdvance(tokens, *at_);
}
while (IsDecimalDigit(*at_)) {
EmitCharAndAdvance(tokens, *at_);
}
if (*at_ == '_') {
while (IsLegalInIdentifier(EmitCharAndAdvance(tokens, *at_))) {
}
}
return true;
}
void Prescanner::QuotedCharacterLiteral(
TokenSequence &tokens, const char *start) {
char quote{*at_};
const char *end{at_ + 1};
inCharLiteral_ = true;
const auto emit{[&](char ch) { EmitChar(tokens, ch); }};
const auto insert{[&](char ch) { EmitInsertedChar(tokens, ch); }};
bool isEscaped{false};
bool escapesEnabled{features_.IsEnabled(LanguageFeature::BackslashEscapes)};
while (true) {
if (*at_ == '\\') {
if (escapesEnabled) {
isEscaped = !isEscaped;
} else {
// The parser always processes escape sequences, so don't confuse it
// when escapes are disabled.
insert('\\');
}
} else {
isEscaped = false;
}
EmitQuotedChar(static_cast<unsigned char>(*at_), emit, insert, false,
Encoding::LATIN_1);
while (PadOutCharacterLiteral(tokens)) {
}
if (*at_ == '\n') {
if (!inPreprocessorDirective_) {
Say(GetProvenanceRange(start, end),
"Incomplete character literal"_err_en_US);
}
break;
}
end = at_ + 1;
NextChar();
if (*at_ == quote && !isEscaped) {
// A doubled unescaped quote mark becomes a single instance of that
// quote character in the literal (later). There can be spaces between
// the quotes in fixed form source.
EmitChar(tokens, quote);
inCharLiteral_ = false; // for cases like print *, '...'!comment
NextChar();
if (InFixedFormSource()) {
SkipSpaces();
}
if (*at_ != quote) {
break;
}
inCharLiteral_ = true;
}
}
inCharLiteral_ = false;
}
void Prescanner::Hollerith(
TokenSequence &tokens, int count, const char *start) {
inCharLiteral_ = true;
CHECK(*at_ == 'h' || *at_ == 'H');
EmitChar(tokens, 'H');
while (count-- > 0) {
if (PadOutCharacterLiteral(tokens)) {
} else if (*at_ == '\n') {
Say(GetProvenanceRange(start, at_),
"Possible truncated Hollerith literal"_en_US);
break;
} else {
NextChar();
// Each multi-byte character encoding counts as a single character.
// No escape sequences are recognized.
// Hollerith is always emitted to the cooked character
// stream in UTF-8.
DecodedCharacter decoded{DecodeCharacter(
encoding_, at_, static_cast<std::size_t>(limit_ - at_), false)};
if (decoded.bytes > 0) {
EncodedCharacter utf8{
EncodeCharacter<Encoding::UTF_8>(decoded.codepoint)};
for (int j{0}; j < utf8.bytes; ++j) {
EmitChar(tokens, utf8.buffer[j]);
}
at_ += decoded.bytes - 1;
} else {
Say(GetProvenanceRange(start, at_),
"Bad character in Hollerith literal"_err_en_US);
break;
}
}
}
if (*at_ != '\n') {
NextChar();
}
inCharLiteral_ = false;
}
// In fixed form, source card images must be processed as if they were at
// least 72 columns wide, at least in character literal contexts.
bool Prescanner::PadOutCharacterLiteral(TokenSequence &tokens) {
while (inFixedForm_ && !tabInCurrentLine_ && at_[1] == '\n') {
if (column_ < fixedFormColumnLimit_) {
tokens.PutNextTokenChar(' ', spaceProvenance_);
++column_;
return true;
}
if (!FixedFormContinuation(false /*no need to insert space*/) ||
tabInCurrentLine_) {
return false;
}
CHECK(column_ == 7);
--at_; // point to column 6 of continuation line
column_ = 6;
}
return false;
}
bool Prescanner::IsFixedFormCommentLine(const char *start) const {
const char *p{start};
if (IsFixedFormCommentChar(*p) || *p == '%' || // VAX %list, %eject, &c.
((*p == 'D' || *p == 'd') &&
!features_.IsEnabled(LanguageFeature::OldDebugLines))) {
return true;
}
bool anyTabs{false};
while (true) {
if (*p == ' ') {
++p;
} else if (*p == '\t') {
anyTabs = true;
++p;
} else if (*p == '0' && !anyTabs && p == start + 5) {
++p; // 0 in column 6 must treated as a space
} else {
break;
}
}
if (!anyTabs && p >= start + fixedFormColumnLimit_) {
return true;
}
if (*p == '!' && !inCharLiteral_ && (anyTabs || p != start + 5)) {
return true;
}
return *p == '\n';
}
const char *Prescanner::IsFreeFormComment(const char *p) const {
p = SkipWhiteSpaceAndCComments(p);
if (*p == '!' || *p == '\n') {
return p;
} else {
return nullptr;
}
}
std::optional<std::size_t> Prescanner::IsIncludeLine(const char *start) const {
const char *p{SkipWhiteSpace(start)};
for (char ch : "include"s) {
if (ToLowerCaseLetter(*p++) != ch) {
return std::nullopt;
}
}
p = SkipWhiteSpace(p);
if (*p == '"' || *p == '\'') {
return {p - start};
}
return std::nullopt;
}
void Prescanner::FortranInclude(const char *firstQuote) {
const char *p{firstQuote};
while (*p != '"' && *p != '\'') {
++p;
}
char quote{*p};
std::string path;
for (++p; *p != '\n'; ++p) {
if (*p == quote) {
if (p[1] != quote) {
break;
}
++p;
}
path += *p;
}
if (*p != quote) {
Say(GetProvenanceRange(firstQuote, p),
"malformed path name string"_err_en_US);
return;
}
p = SkipWhiteSpace(p + 1);
if (*p != '\n' && *p != '!') {
const char *garbage{p};
for (; *p != '\n' && *p != '!'; ++p) {
}
Say(GetProvenanceRange(garbage, p),
"excess characters after path name"_en_US);
}
std::string buf;
llvm::raw_string_ostream error{buf};
Provenance provenance{GetProvenance(nextLine_)};
std::optional<std::string> prependPath;
if (const SourceFile * currentFile{allSources_.GetSourceFile(provenance)}) {
prependPath = DirectoryName(currentFile->path());
}
const SourceFile *included{
allSources_.Open(path, error, std::move(prependPath))};
if (!included) {
Say(provenance, "INCLUDE: %s"_err_en_US, error.str());
} else if (included->bytes() > 0) {
ProvenanceRange includeLineRange{
provenance, static_cast<std::size_t>(p - nextLine_)};
ProvenanceRange fileRange{
allSources_.AddIncludedFile(*included, includeLineRange)};
Prescanner{*this}.set_encoding(included->encoding()).Prescan(fileRange);
}
}
const char *Prescanner::IsPreprocessorDirectiveLine(const char *start) const {
const char *p{start};
for (; *p == ' '; ++p) {
}
if (*p == '#') {
if (inFixedForm_ && p == start + 5) {
return nullptr;
}
} else {
p = SkipWhiteSpace(p);
if (*p != '#') {
return nullptr;
}
}
return SkipWhiteSpace(p + 1);
}
bool Prescanner::IsNextLinePreprocessorDirective() const {
return IsPreprocessorDirectiveLine(nextLine_) != nullptr;
}
bool Prescanner::SkipCommentLine(bool afterAmpersand) {
if (IsAtEnd()) {
if (afterAmpersand && prescannerNesting_ > 0) {
// A continuation marker at the end of the last line in an
// include file inhibits the newline for that line.
SkipToEndOfLine();
omitNewline_ = true;
}
return false;
}
auto lineClass{ClassifyLine(nextLine_)};
if (lineClass.kind == LineClassification::Kind::Comment) {
NextLine();
return true;
} else if (inPreprocessorDirective_) {
return false;
} else if (lineClass.kind ==
LineClassification::Kind::ConditionalCompilationDirective ||
lineClass.kind == LineClassification::Kind::PreprocessorDirective) {
// Allow conditional compilation directives (e.g., #ifdef) to affect
// continuation lines.
// Allow other preprocessor directives, too, except #include
// (when it does not follow '&'), #define, and #undef (because
// they cannot be allowed to affect preceding text on a
// continued line).
preprocessor_.Directive(TokenizePreprocessorDirective(), *this);
return true;
} else if (afterAmpersand &&
(lineClass.kind == LineClassification::Kind::IncludeDirective ||
lineClass.kind == LineClassification::Kind::IncludeLine)) {
SkipToEndOfLine();
omitNewline_ = true;
skipLeadingAmpersand_ = true;
return false;
} else {
return false;
}
}
const char *Prescanner::FixedFormContinuationLine(bool mightNeedSpace) {
if (IsAtEnd()) {
return nullptr;
}
tabInCurrentLine_ = false;
char col1{*nextLine_};
if (InCompilerDirective()) {
// Must be a continued compiler directive.
if (!IsFixedFormCommentChar(col1)) {
return nullptr;
}
int j{1};
for (; j < 5; ++j) {
char ch{directiveSentinel_[j - 1]};
if (ch == '\0') {
break;
}
if (ch != ToLowerCaseLetter(nextLine_[j])) {
return nullptr;
}
}
for (; j < 5; ++j) {
if (nextLine_[j] != ' ') {
return nullptr;
}
}
char col6{nextLine_[5]};
if (col6 != '\n' && col6 != '\t' && col6 != ' ' && col6 != '0') {
if (nextLine_[6] != ' ' && mightNeedSpace) {
insertASpace_ = true;
}
return nextLine_ + 6;
}
return nullptr;
} else {
// Normal case: not in a compiler directive.
if (col1 == '&' &&
features_.IsEnabled(
LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) {
// Extension: '&' as continuation marker
if (features_.ShouldWarn(
LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) {
Say(GetProvenance(nextLine_), "nonstandard usage"_en_US);
}
return nextLine_ + 1;
}
if (col1 == '\t' && nextLine_[1] >= '1' && nextLine_[1] <= '9') {
tabInCurrentLine_ = true;
return nextLine_ + 2; // VAX extension
}
if (col1 == ' ' && nextLine_[1] == ' ' && nextLine_[2] == ' ' &&
nextLine_[3] == ' ' && nextLine_[4] == ' ') {
char col6{nextLine_[5]};
if (col6 != '\n' && col6 != '\t' && col6 != ' ' && col6 != '0') {
return nextLine_ + 6;
}
}
if (IsImplicitContinuation()) {
return nextLine_;
}
}
return nullptr; // not a continuation line
}
const char *Prescanner::FreeFormContinuationLine(bool ampersand) {
const char *p{nextLine_};
if (p >= limit_) {
return nullptr;
}
p = SkipWhiteSpace(p);
if (InCompilerDirective()) {
if (*p++ != '!') {
return nullptr;
}
for (const char *s{directiveSentinel_}; *s != '\0'; ++p, ++s) {
if (*s != ToLowerCaseLetter(*p)) {
return nullptr;
}
}
p = SkipWhiteSpace(p);
if (*p == '&') {
if (!ampersand) {
insertASpace_ = true;
}
return p + 1;
} else if (ampersand) {
return p;
} else {
return nullptr;
}
} else {
if (*p == '&') {
return p + 1;
} else if (*p == '!' || *p == '\n' || *p == '#') {
return nullptr;
} else if (ampersand || IsImplicitContinuation()) {
if (p > nextLine_) {
--p;
} else {
insertASpace_ = true;
}
return p;
} else {
return nullptr;
}
}
}
bool Prescanner::FixedFormContinuation(bool mightNeedSpace) {
// N.B. We accept '&' as a continuation indicator in fixed form, too,
// but not in a character literal.
if (*at_ == '&' && inCharLiteral_) {
return false;
}
do {
if (const char *cont{FixedFormContinuationLine(mightNeedSpace)}) {
BeginSourceLine(cont);
column_ = 7;
NextLine();
return true;
}
} while (SkipCommentLine(false /* not after ampersand */));
return false;
}
bool Prescanner::FreeFormContinuation() {
const char *p{at_};
bool ampersand{*p == '&'};
if (ampersand) {
p = SkipWhiteSpace(p + 1);
}
if (*p != '\n') {
if (inCharLiteral_) {
return false;
} else if (*p != '!' &&
features_.ShouldWarn(LanguageFeature::CruftAfterAmpersand)) {
Say(GetProvenance(p), "missing ! before comment after &"_en_US);
}
}
do {
if (const char *cont{FreeFormContinuationLine(ampersand)}) {
BeginSourceLine(cont);
NextLine();
return true;
}
} while (SkipCommentLine(ampersand));
return false;
}
// Implicit line continuation allows a preprocessor macro call with
// arguments to span multiple lines.
bool Prescanner::IsImplicitContinuation() const {
return !inPreprocessorDirective_ && !inCharLiteral_ &&
delimiterNesting_ > 0 && !IsAtEnd() &&
ClassifyLine(nextLine_).kind == LineClassification::Kind::Source;
}
bool Prescanner::Continuation(bool mightNeedFixedFormSpace) {
if (*at_ == '\n' || *at_ == '&') {
if (inFixedForm_) {
return FixedFormContinuation(mightNeedFixedFormSpace);
} else {
return FreeFormContinuation();
}
} else {
return false;
}
}
std::optional<Prescanner::LineClassification>
Prescanner::IsFixedFormCompilerDirectiveLine(const char *start) const {
const char *p{start};
char col1{*p++};
if (!IsFixedFormCommentChar(col1)) {
return std::nullopt;
}
char sentinel[5], *sp{sentinel};
int column{2};
for (; column < 6; ++column, ++p) {
if (*p != ' ') {
if (*p == '\n' || *p == '\t') {
break;
}
if (sp == sentinel + 1 && sentinel[0] == '$' && IsDecimalDigit(*p)) {
// OpenMP conditional compilation line: leave the label alone
break;
}
*sp++ = ToLowerCaseLetter(*p);
}
}
if (column == 6) {
if (*p == ' ' || *p == '\t' || *p == '0') {
++p;
} else {
// This is a Continuation line, not an initial directive line.
return std::nullopt;
}
}
if (sp == sentinel) {
return std::nullopt;
}
*sp = '\0';
if (const char *ss{IsCompilerDirectiveSentinel(sentinel)}) {
std::size_t payloadOffset = p - start;
return {LineClassification{
LineClassification::Kind::CompilerDirective, payloadOffset, ss}};
}
return std::nullopt;
}
std::optional<Prescanner::LineClassification>
Prescanner::IsFreeFormCompilerDirectiveLine(const char *start) const {
char sentinel[8];
const char *p{SkipWhiteSpace(start)};
if (*p++ != '!') {
return std::nullopt;
}
for (std::size_t j{0}; j + 1 < sizeof sentinel; ++p, ++j) {
if (*p == '\n') {
break;
}
if (*p == ' ' || *p == '\t' || *p == '&') {
if (j == 0) {
break;
}
sentinel[j] = '\0';
p = SkipWhiteSpace(p + 1);
if (*p == '!') {
break;
}
if (const char *sp{IsCompilerDirectiveSentinel(sentinel)}) {
std::size_t offset = p - start;
return {LineClassification{
LineClassification::Kind::CompilerDirective, offset, sp}};
}
break;
}
sentinel[j] = ToLowerCaseLetter(*p);
}
return std::nullopt;
}
Prescanner &Prescanner::AddCompilerDirectiveSentinel(const std::string &dir) {
std::uint64_t packed{0};
for (char ch : dir) {
packed = (packed << 8) | (ToLowerCaseLetter(ch) & 0xff);
}
compilerDirectiveBloomFilter_.set(packed % prime1);
compilerDirectiveBloomFilter_.set(packed % prime2);
compilerDirectiveSentinels_.insert(dir);
return *this;
}
const char *Prescanner::IsCompilerDirectiveSentinel(
const char *sentinel) const {
std::uint64_t packed{0};
std::size_t n{0};
for (; sentinel[n] != '\0'; ++n) {
packed = (packed << 8) | (sentinel[n] & 0xff);
}
if (n == 0 || !compilerDirectiveBloomFilter_.test(packed % prime1) ||
!compilerDirectiveBloomFilter_.test(packed % prime2)) {
return nullptr;
}
const auto iter{compilerDirectiveSentinels_.find(std::string(sentinel, n))};
return iter == compilerDirectiveSentinels_.end() ? nullptr : iter->c_str();
}
constexpr bool IsDirective(const char *match, const char *dir) {
for (; *match; ++match) {
if (*match != ToLowerCaseLetter(*dir++)) {
return false;
}
}
return true;
}
Prescanner::LineClassification Prescanner::ClassifyLine(
const char *start) const {
if (inFixedForm_) {
if (std::optional<LineClassification> lc{
IsFixedFormCompilerDirectiveLine(start)}) {
return std::move(*lc);
}
if (IsFixedFormCommentLine(start)) {
return {LineClassification::Kind::Comment};
}
} else {
if (std::optional<LineClassification> lc{
IsFreeFormCompilerDirectiveLine(start)}) {
return std::move(*lc);
}
if (const char *bang{IsFreeFormComment(start)}) {
return {LineClassification::Kind::Comment,
static_cast<std::size_t>(bang - start)};
}
}
if (std::optional<std::size_t> quoteOffset{IsIncludeLine(start)}) {
return {LineClassification::Kind::IncludeLine, *quoteOffset};
}
if (const char *dir{IsPreprocessorDirectiveLine(start)}) {
if (IsDirective("if", dir) || IsDirective("elif", dir) ||
IsDirective("else", dir) || IsDirective("endif", dir)) {
return {LineClassification::Kind::ConditionalCompilationDirective};
} else if (IsDirective("include", dir)) {
return {LineClassification::Kind::IncludeDirective};
} else if (IsDirective("define", dir) || IsDirective("undef", dir)) {
return {LineClassification::Kind::DefinitionDirective};
} else {
return {LineClassification::Kind::PreprocessorDirective};
}
}
return {LineClassification::Kind::Source};
}
void Prescanner::SourceFormChange(std::string &&dir) {
if (dir == "!dir$ free") {
inFixedForm_ = false;
} else if (dir == "!dir$ fixed") {
inFixedForm_ = true;
}
}
} // namespace Fortran::parser