//===-- lib/Parser/prescan.cpp --------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "prescan.h" #include "preprocessor.h" #include "token-sequence.h" #include "flang/Common/idioms.h" #include "flang/Parser/characters.h" #include "flang/Parser/message.h" #include "flang/Parser/source.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include namespace Fortran::parser { using common::LanguageFeature; static constexpr int maxPrescannerNesting{100}; Prescanner::Prescanner(Messages &messages, CookedSource &cooked, Preprocessor &preprocessor, common::LanguageFeatureControl lfc) : messages_{messages}, cooked_{cooked}, preprocessor_{preprocessor}, allSources_{preprocessor_.allSources()}, features_{lfc}, encoding_{allSources_.encoding()} {} Prescanner::Prescanner(const Prescanner &that) : messages_{that.messages_}, cooked_{that.cooked_}, preprocessor_{that.preprocessor_}, allSources_{that.allSources_}, features_{that.features_}, inFixedForm_{that.inFixedForm_}, fixedFormColumnLimit_{that.fixedFormColumnLimit_}, encoding_{that.encoding_}, prescannerNesting_{that.prescannerNesting_ + 1}, skipLeadingAmpersand_{that.skipLeadingAmpersand_}, compilerDirectiveBloomFilter_{that.compilerDirectiveBloomFilter_}, compilerDirectiveSentinels_{that.compilerDirectiveSentinels_} {} static inline constexpr bool IsFixedFormCommentChar(char ch) { return ch == '!' || ch == '*' || ch == 'C' || ch == 'c'; } static void NormalizeCompilerDirectiveCommentMarker(TokenSequence &dir) { char *p{dir.GetMutableCharData()}; char *limit{p + dir.SizeInChars()}; for (; p < limit; ++p) { if (*p != ' ') { CHECK(IsFixedFormCommentChar(*p)); *p = '!'; return; } } DIE("compiler directive all blank"); } void Prescanner::Prescan(ProvenanceRange range) { startProvenance_ = range.start(); start_ = allSources_.GetSource(range); CHECK(start_); limit_ = start_ + range.size(); nextLine_ = start_; const bool beganInFixedForm{inFixedForm_}; if (prescannerNesting_ > maxPrescannerNesting) { Say(GetProvenance(start_), "too many nested INCLUDE/#include files, possibly circular"_err_en_US); return; } while (!IsAtEnd()) { Statement(); } if (inFixedForm_ != beganInFixedForm) { std::string dir{"!dir$ "}; if (beganInFixedForm) { dir += "fixed"; } else { dir += "free"; } dir += '\n'; TokenSequence tokens{dir, allSources_.AddCompilerInsertion(dir).start()}; tokens.Emit(cooked_); } } void Prescanner::Statement() { TokenSequence tokens; LineClassification line{ClassifyLine(nextLine_)}; switch (line.kind) { case LineClassification::Kind::Comment: nextLine_ += line.payloadOffset; // advance to '!' or newline NextLine(); return; case LineClassification::Kind::IncludeLine: FortranInclude(nextLine_ + line.payloadOffset); NextLine(); return; case LineClassification::Kind::ConditionalCompilationDirective: case LineClassification::Kind::IncludeDirective: case LineClassification::Kind::DefinitionDirective: case LineClassification::Kind::PreprocessorDirective: preprocessor_.Directive(TokenizePreprocessorDirective(), *this); return; case LineClassification::Kind::CompilerDirective: directiveSentinel_ = line.sentinel; CHECK(InCompilerDirective()); BeginStatementAndAdvance(); if (inFixedForm_) { CHECK(IsFixedFormCommentChar(*at_)); } else { while (*at_ == ' ' || *at_ == '\t') { ++at_, ++column_; } CHECK(*at_ == '!'); } if (directiveSentinel_[0] == '$' && directiveSentinel_[1] == '\0') { // OpenMP conditional compilation line. Remove the sentinel and then // treat the line as if it were normal source. at_ += 2, column_ += 2; if (inFixedForm_) { LabelField(tokens); } else { SkipSpaces(); } } else { // Compiler directive. Emit normalized sentinel. EmitChar(tokens, '!'); ++at_, ++column_; for (const char *sp{directiveSentinel_}; *sp != '\0'; ++sp, ++at_, ++column_) { EmitChar(tokens, *sp); } if (*at_ == ' ') { EmitChar(tokens, ' '); ++at_, ++column_; } tokens.CloseToken(); } break; case LineClassification::Kind::Source: BeginStatementAndAdvance(); if (inFixedForm_) { LabelField(tokens); } else if (skipLeadingAmpersand_) { skipLeadingAmpersand_ = false; const char *p{SkipWhiteSpace(at_)}; if (p < limit_ && *p == '&') { column_ += ++p - at_; at_ = p; } } else { SkipSpaces(); } break; } while (NextToken(tokens)) { } Provenance newlineProvenance{GetCurrentProvenance()}; if (std::optional preprocessed{ preprocessor_.MacroReplacement(tokens, *this)}) { // Reprocess the preprocessed line. Append a newline temporarily. preprocessed->PutNextTokenChar('\n', newlineProvenance); preprocessed->CloseToken(); const char *ppd{preprocessed->ToCharBlock().begin()}; LineClassification ppl{ClassifyLine(ppd)}; preprocessed->pop_back(); // remove the newline switch (ppl.kind) { case LineClassification::Kind::Comment: break; case LineClassification::Kind::IncludeLine: FortranInclude(ppd + ppl.payloadOffset); break; case LineClassification::Kind::ConditionalCompilationDirective: case LineClassification::Kind::IncludeDirective: case LineClassification::Kind::DefinitionDirective: case LineClassification::Kind::PreprocessorDirective: Say(preprocessed->GetProvenanceRange(), "Preprocessed line resembles a preprocessor directive"_en_US); preprocessed->ToLowerCase() .CheckBadFortranCharacters(messages_) .CheckBadParentheses(messages_) .Emit(cooked_); break; case LineClassification::Kind::CompilerDirective: if (preprocessed->HasRedundantBlanks()) { preprocessed->RemoveRedundantBlanks(); } NormalizeCompilerDirectiveCommentMarker(*preprocessed); preprocessed->ToLowerCase(); SourceFormChange(preprocessed->ToString()); preprocessed->ClipComment(true /* skip first ! */) .CheckBadFortranCharacters(messages_) .CheckBadParentheses(messages_) .Emit(cooked_); break; case LineClassification::Kind::Source: if (inFixedForm_) { if (preprocessed->HasBlanks(/*after column*/ 6)) { preprocessed->RemoveBlanks(/*after column*/ 6); } } else { if (preprocessed->HasRedundantBlanks()) { preprocessed->RemoveRedundantBlanks(); } } preprocessed->ToLowerCase() .ClipComment() .CheckBadFortranCharacters(messages_) .CheckBadParentheses(messages_) .Emit(cooked_); break; } } else { tokens.ToLowerCase(); if (line.kind == LineClassification::Kind::CompilerDirective) { SourceFormChange(tokens.ToString()); } if (inFixedForm_ && line.kind == LineClassification::Kind::Source) { EnforceStupidEndStatementRules(tokens); } tokens.CheckBadFortranCharacters(messages_) .CheckBadParentheses(messages_) .Emit(cooked_); } if (omitNewline_) { omitNewline_ = false; } else { cooked_.Put('\n', newlineProvenance); } directiveSentinel_ = nullptr; } TokenSequence Prescanner::TokenizePreprocessorDirective() { CHECK(!IsAtEnd() && !inPreprocessorDirective_); inPreprocessorDirective_ = true; BeginStatementAndAdvance(); TokenSequence tokens; while (NextToken(tokens)) { } inPreprocessorDirective_ = false; return tokens; } void Prescanner::NextLine() { void *vstart{static_cast(const_cast(nextLine_))}; void *v{std::memchr(vstart, '\n', limit_ - nextLine_)}; if (!v) { nextLine_ = limit_; } else { const char *nl{const_cast(static_cast(v))}; nextLine_ = nl + 1; } } void Prescanner::LabelField(TokenSequence &token) { const char *bad{nullptr}; int outCol{1}; const char *start{at_}; for (; *at_ != '\n' && column_ <= 6; ++at_) { if (*at_ == '\t') { ++at_; column_ = 7; break; } if (*at_ != ' ' && !(*at_ == '0' && column_ == 6)) { // '0' in column 6 becomes space EmitChar(token, *at_); ++outCol; if (!bad && !IsDecimalDigit(*at_)) { bad = at_; } } ++column_; } if (bad && !preprocessor_.IsNameDefined(token.CurrentOpenToken())) { Say(GetProvenance(bad), "Character in fixed-form label field must be a digit"_en_US); token.clear(); at_ = start; return; } if (outCol == 1) { // empty label field // Emit a space so that, if the line is rescanned after preprocessing, // a leading 'C' or 'D' won't be left-justified and then accidentally // misinterpreted as a comment card. EmitChar(token, ' '); ++outCol; } token.CloseToken(); SkipToNextSignificantCharacter(); if (IsDecimalDigit(*at_)) { Say(GetProvenance(at_), "Label digit is not in fixed-form label field"_en_US); } } // 6.3.3.5: A program unit END statement, or any other statement whose // initial line resembles an END statement, shall not be continued in // fixed form source. void Prescanner::EnforceStupidEndStatementRules(const TokenSequence &tokens) { CharBlock cBlock{tokens.ToCharBlock()}; const char *str{cBlock.begin()}; std::size_t n{cBlock.size()}; if (n < 3) { return; } std::size_t j{0}; for (; j < n && (str[j] == ' ' || (str[j] >= '0' && str[j] <= '9')); ++j) { } if (j + 3 > n || std::memcmp(str + j, "end", 3) != 0) { return; } // It starts with END, possibly after a label. auto start{allSources_.GetSourcePosition(tokens.GetCharProvenance(j))}; auto end{allSources_.GetSourcePosition(tokens.GetCharProvenance(n - 1))}; if (!start || !end) { return; } if (&start->file == &end->file && start->line == end->line) { return; // no continuation } j += 3; static const char *const prefixes[]{"program", "subroutine", "function", "blockdata", "module", "submodule", nullptr}; bool isPrefix{j == n || !IsLegalInIdentifier(str[j])}; // prefix is END std::size_t endOfPrefix{j - 1}; for (const char *const *p{prefixes}; *p; ++p) { std::size_t pLen{std::strlen(*p)}; if (j + pLen <= n && std::memcmp(str + j, *p, pLen) == 0) { isPrefix = true; // END thing as prefix j += pLen; endOfPrefix = j - 1; for (; j < n && IsLegalInIdentifier(str[j]); ++j) { } break; } } if (isPrefix) { auto range{tokens.GetTokenProvenanceRange(1)}; if (j == n) { // END or END thing [name] Say(range, "Program unit END statement may not be continued in fixed form source"_err_en_US); } else { auto endOfPrefixPos{ allSources_.GetSourcePosition(tokens.GetCharProvenance(endOfPrefix))}; auto next{allSources_.GetSourcePosition(tokens.GetCharProvenance(j))}; if (endOfPrefixPos && next && &endOfPrefixPos->file == &start->file && endOfPrefixPos->line == start->line && (&next->file != &start->file || next->line != start->line)) { Say(range, "Initial line of continued statement must not appear to be a program unit END in fixed form source"_err_en_US); } } } } void Prescanner::SkipToEndOfLine() { while (*at_ != '\n') { ++at_, ++column_; } } bool Prescanner::MustSkipToEndOfLine() const { if (inFixedForm_ && column_ > fixedFormColumnLimit_ && !tabInCurrentLine_) { return true; // skip over ignored columns in right margin (73:80) } else if (*at_ == '!' && !inCharLiteral_) { return true; // inline comment goes to end of source line } else { return false; } } void Prescanner::NextChar() { CHECK(*at_ != '\n'); ++at_, ++column_; while (at_[0] == '\xef' && at_[1] == '\xbb' && at_[2] == '\xbf') { // UTF-8 byte order mark - treat this file as UTF-8 at_ += 3; encoding_ = Encoding::UTF_8; } SkipToNextSignificantCharacter(); } // Skip everything that should be ignored until the next significant // character is reached; handles C-style comments in preprocessing // directives, Fortran ! comments, stuff after the right margin in // fixed form, and all forms of line continuation. void Prescanner::SkipToNextSignificantCharacter() { if (inPreprocessorDirective_) { SkipCComments(); } else { bool mightNeedSpace{false}; if (MustSkipToEndOfLine()) { SkipToEndOfLine(); } else { mightNeedSpace = *at_ == '\n'; } for (; Continuation(mightNeedSpace); mightNeedSpace = false) { if (MustSkipToEndOfLine()) { SkipToEndOfLine(); } } if (*at_ == '\t') { tabInCurrentLine_ = true; } } } void Prescanner::SkipCComments() { while (true) { if (IsCComment(at_)) { if (const char *after{SkipCComment(at_)}) { column_ += after - at_; // May have skipped over one or more newlines; relocate the start of // the next line. nextLine_ = at_ = after; NextLine(); } else { // Don't emit any messages about unclosed C-style comments, because // the sequence /* can appear legally in a FORMAT statement. There's // no ambiguity, since the sequence */ cannot appear legally. break; } } else if (inPreprocessorDirective_ && at_[0] == '\\' && at_ + 2 < limit_ && at_[1] == '\n' && !IsAtEnd()) { BeginSourceLineAndAdvance(); } else { break; } } } void Prescanner::SkipSpaces() { while (*at_ == ' ' || *at_ == '\t') { NextChar(); } insertASpace_ = false; } const char *Prescanner::SkipWhiteSpace(const char *p) { while (*p == ' ' || *p == '\t') { ++p; } return p; } const char *Prescanner::SkipWhiteSpaceAndCComments(const char *p) const { while (true) { if (*p == ' ' || *p == '\t') { ++p; } else if (IsCComment(p)) { if (const char *after{SkipCComment(p)}) { p = after; } else { break; } } else { break; } } return p; } const char *Prescanner::SkipCComment(const char *p) const { char star{' '}, slash{' '}; p += 2; while (star != '*' || slash != '/') { if (p >= limit_) { return nullptr; // signifies an unterminated comment } star = slash; slash = *p++; } return p; } bool Prescanner::NextToken(TokenSequence &tokens) { CHECK(at_ >= start_ && at_ < limit_); if (InFixedFormSource()) { SkipSpaces(); } else { if (*at_ == '/' && IsCComment(at_)) { // Recognize and skip over classic C style /*comments*/ when // outside a character literal. if (features_.ShouldWarn(LanguageFeature::ClassicCComments)) { Say(GetProvenance(at_), "nonstandard usage: C-style comment"_en_US); } SkipCComments(); } if (*at_ == ' ' || *at_ == '\t') { // Compress free-form white space into a single space character. const auto theSpace{at_}; char previous{at_ <= start_ ? ' ' : at_[-1]}; NextChar(); SkipSpaces(); if (*at_ == '\n') { // Discard white space at the end of a line. } else if (!inPreprocessorDirective_ && (previous == '(' || *at_ == '(' || *at_ == ')')) { // Discard white space before/after '(' and before ')', unless in a // preprocessor directive. This helps yield space-free contiguous // names for generic interfaces like OPERATOR( + ) and // READ ( UNFORMATTED ), without misinterpreting #define f (notAnArg). // This has the effect of silently ignoring the illegal spaces in // the array constructor ( /1,2/ ) but that seems benign; it's // hard to avoid that while still removing spaces from OPERATOR( / ) // and OPERATOR( // ). } else { // Preserve the squashed white space as a single space character. tokens.PutNextTokenChar(' ', GetProvenance(theSpace)); tokens.CloseToken(); return true; } } } if (insertASpace_) { tokens.PutNextTokenChar(' ', spaceProvenance_); insertASpace_ = false; } if (*at_ == '\n') { return false; } const char *start{at_}; if (*at_ == '\'' || *at_ == '"') { QuotedCharacterLiteral(tokens, start); preventHollerith_ = false; } else if (IsDecimalDigit(*at_)) { int n{0}, digits{0}; static constexpr int maxHollerith{256 /*lines*/ * (132 - 6 /*columns*/)}; do { if (n < maxHollerith) { n = 10 * n + DecimalDigitValue(*at_); } EmitCharAndAdvance(tokens, *at_); ++digits; if (InFixedFormSource()) { SkipSpaces(); } } while (IsDecimalDigit(*at_)); if ((*at_ == 'h' || *at_ == 'H') && n > 0 && n < maxHollerith && !preventHollerith_) { Hollerith(tokens, n, start); } else if (*at_ == '.') { while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) { } ExponentAndKind(tokens); } else if (ExponentAndKind(tokens)) { } else if (digits == 1 && n == 0 && (*at_ == 'x' || *at_ == 'X') && inPreprocessorDirective_) { do { EmitCharAndAdvance(tokens, *at_); } while (IsHexadecimalDigit(*at_)); } else if (IsLetter(*at_)) { // Handles FORMAT(3I9HHOLLERITH) by skipping over the first I so that // we don't misrecognize I9HOLLERITH as an identifier in the next case. EmitCharAndAdvance(tokens, *at_); } else if (at_[0] == '_' && (at_[1] == '\'' || at_[1] == '"')) { // 4_"..." EmitCharAndAdvance(tokens, *at_); QuotedCharacterLiteral(tokens, start); } preventHollerith_ = false; } else if (*at_ == '.') { char nch{EmitCharAndAdvance(tokens, '.')}; if (!inPreprocessorDirective_ && IsDecimalDigit(nch)) { while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) { } ExponentAndKind(tokens); } else if (nch == '.' && EmitCharAndAdvance(tokens, '.') == '.') { EmitCharAndAdvance(tokens, '.'); // variadic macro definition ellipsis } preventHollerith_ = false; } else if (IsLegalInIdentifier(*at_)) { do { } while (IsLegalInIdentifier(EmitCharAndAdvance(tokens, *at_))); if ((*at_ == '\'' || *at_ == '"') && tokens.CharAt(tokens.SizeInChars() - 1) == '_') { // kind_"..." QuotedCharacterLiteral(tokens, start); } preventHollerith_ = false; } else if (*at_ == '*') { if (EmitCharAndAdvance(tokens, '*') == '*') { EmitCharAndAdvance(tokens, '*'); } else { // Subtle ambiguity: // CHARACTER*2H declares H because *2 is a kind specifier // DATAC/N*2H / is repeated Hollerith preventHollerith_ = !slashInCurrentStatement_; } } else { char ch{*at_}; if (ch == '(' || ch == '[') { ++delimiterNesting_; } else if ((ch == ')' || ch == ']') && delimiterNesting_ > 0) { --delimiterNesting_; } char nch{EmitCharAndAdvance(tokens, ch)}; preventHollerith_ = false; if ((nch == '=' && (ch == '<' || ch == '>' || ch == '/' || ch == '=' || ch == '!')) || (ch == nch && (ch == '/' || ch == ':' || ch == '*' || ch == '#' || ch == '&' || ch == '|' || ch == '<' || ch == '>')) || (ch == '=' && nch == '>')) { // token comprises two characters EmitCharAndAdvance(tokens, nch); } else if (ch == '/') { slashInCurrentStatement_ = true; } } tokens.CloseToken(); return true; } bool Prescanner::ExponentAndKind(TokenSequence &tokens) { char ed{ToLowerCaseLetter(*at_)}; if (ed != 'e' && ed != 'd') { return false; } EmitCharAndAdvance(tokens, ed); if (*at_ == '+' || *at_ == '-') { EmitCharAndAdvance(tokens, *at_); } while (IsDecimalDigit(*at_)) { EmitCharAndAdvance(tokens, *at_); } if (*at_ == '_') { while (IsLegalInIdentifier(EmitCharAndAdvance(tokens, *at_))) { } } return true; } void Prescanner::QuotedCharacterLiteral( TokenSequence &tokens, const char *start) { char quote{*at_}; const char *end{at_ + 1}; inCharLiteral_ = true; const auto emit{[&](char ch) { EmitChar(tokens, ch); }}; const auto insert{[&](char ch) { EmitInsertedChar(tokens, ch); }}; bool isEscaped{false}; bool escapesEnabled{features_.IsEnabled(LanguageFeature::BackslashEscapes)}; while (true) { if (*at_ == '\\') { if (escapesEnabled) { isEscaped = !isEscaped; } else { // The parser always processes escape sequences, so don't confuse it // when escapes are disabled. insert('\\'); } } else { isEscaped = false; } EmitQuotedChar(static_cast(*at_), emit, insert, false, Encoding::LATIN_1); while (PadOutCharacterLiteral(tokens)) { } if (*at_ == '\n') { if (!inPreprocessorDirective_) { Say(GetProvenanceRange(start, end), "Incomplete character literal"_err_en_US); } break; } end = at_ + 1; NextChar(); if (*at_ == quote && !isEscaped) { // A doubled unescaped quote mark becomes a single instance of that // quote character in the literal (later). There can be spaces between // the quotes in fixed form source. EmitChar(tokens, quote); inCharLiteral_ = false; // for cases like print *, '...'!comment NextChar(); if (InFixedFormSource()) { SkipSpaces(); } if (*at_ != quote) { break; } inCharLiteral_ = true; } } inCharLiteral_ = false; } void Prescanner::Hollerith( TokenSequence &tokens, int count, const char *start) { inCharLiteral_ = true; CHECK(*at_ == 'h' || *at_ == 'H'); EmitChar(tokens, 'H'); while (count-- > 0) { if (PadOutCharacterLiteral(tokens)) { } else if (*at_ == '\n') { Say(GetProvenanceRange(start, at_), "Possible truncated Hollerith literal"_en_US); break; } else { NextChar(); // Each multi-byte character encoding counts as a single character. // No escape sequences are recognized. // Hollerith is always emitted to the cooked character // stream in UTF-8. DecodedCharacter decoded{DecodeCharacter( encoding_, at_, static_cast(limit_ - at_), false)}; if (decoded.bytes > 0) { EncodedCharacter utf8{ EncodeCharacter(decoded.codepoint)}; for (int j{0}; j < utf8.bytes; ++j) { EmitChar(tokens, utf8.buffer[j]); } at_ += decoded.bytes - 1; } else { Say(GetProvenanceRange(start, at_), "Bad character in Hollerith literal"_err_en_US); break; } } } if (*at_ != '\n') { NextChar(); } inCharLiteral_ = false; } // In fixed form, source card images must be processed as if they were at // least 72 columns wide, at least in character literal contexts. bool Prescanner::PadOutCharacterLiteral(TokenSequence &tokens) { while (inFixedForm_ && !tabInCurrentLine_ && at_[1] == '\n') { if (column_ < fixedFormColumnLimit_) { tokens.PutNextTokenChar(' ', spaceProvenance_); ++column_; return true; } if (!FixedFormContinuation(false /*no need to insert space*/) || tabInCurrentLine_) { return false; } CHECK(column_ == 7); --at_; // point to column 6 of continuation line column_ = 6; } return false; } bool Prescanner::IsFixedFormCommentLine(const char *start) const { const char *p{start}; if (IsFixedFormCommentChar(*p) || *p == '%' || // VAX %list, %eject, &c. ((*p == 'D' || *p == 'd') && !features_.IsEnabled(LanguageFeature::OldDebugLines))) { return true; } bool anyTabs{false}; while (true) { if (*p == ' ') { ++p; } else if (*p == '\t') { anyTabs = true; ++p; } else if (*p == '0' && !anyTabs && p == start + 5) { ++p; // 0 in column 6 must treated as a space } else { break; } } if (!anyTabs && p >= start + fixedFormColumnLimit_) { return true; } if (*p == '!' && !inCharLiteral_ && (anyTabs || p != start + 5)) { return true; } return *p == '\n'; } const char *Prescanner::IsFreeFormComment(const char *p) const { p = SkipWhiteSpaceAndCComments(p); if (*p == '!' || *p == '\n') { return p; } else { return nullptr; } } std::optional Prescanner::IsIncludeLine(const char *start) const { const char *p{SkipWhiteSpace(start)}; for (char ch : "include"s) { if (ToLowerCaseLetter(*p++) != ch) { return std::nullopt; } } p = SkipWhiteSpace(p); if (*p == '"' || *p == '\'') { return {p - start}; } return std::nullopt; } void Prescanner::FortranInclude(const char *firstQuote) { const char *p{firstQuote}; while (*p != '"' && *p != '\'') { ++p; } char quote{*p}; std::string path; for (++p; *p != '\n'; ++p) { if (*p == quote) { if (p[1] != quote) { break; } ++p; } path += *p; } if (*p != quote) { Say(GetProvenanceRange(firstQuote, p), "malformed path name string"_err_en_US); return; } p = SkipWhiteSpace(p + 1); if (*p != '\n' && *p != '!') { const char *garbage{p}; for (; *p != '\n' && *p != '!'; ++p) { } Say(GetProvenanceRange(garbage, p), "excess characters after path name"_en_US); } std::string buf; llvm::raw_string_ostream error{buf}; Provenance provenance{GetProvenance(nextLine_)}; std::optional prependPath; if (const SourceFile * currentFile{allSources_.GetSourceFile(provenance)}) { prependPath = DirectoryName(currentFile->path()); } const SourceFile *included{ allSources_.Open(path, error, std::move(prependPath))}; if (!included) { Say(provenance, "INCLUDE: %s"_err_en_US, error.str()); } else if (included->bytes() > 0) { ProvenanceRange includeLineRange{ provenance, static_cast(p - nextLine_)}; ProvenanceRange fileRange{ allSources_.AddIncludedFile(*included, includeLineRange)}; Prescanner{*this}.set_encoding(included->encoding()).Prescan(fileRange); } } const char *Prescanner::IsPreprocessorDirectiveLine(const char *start) const { const char *p{start}; for (; *p == ' '; ++p) { } if (*p == '#') { if (inFixedForm_ && p == start + 5) { return nullptr; } } else { p = SkipWhiteSpace(p); if (*p != '#') { return nullptr; } } return SkipWhiteSpace(p + 1); } bool Prescanner::IsNextLinePreprocessorDirective() const { return IsPreprocessorDirectiveLine(nextLine_) != nullptr; } bool Prescanner::SkipCommentLine(bool afterAmpersand) { if (IsAtEnd()) { if (afterAmpersand && prescannerNesting_ > 0) { // A continuation marker at the end of the last line in an // include file inhibits the newline for that line. SkipToEndOfLine(); omitNewline_ = true; } return false; } auto lineClass{ClassifyLine(nextLine_)}; if (lineClass.kind == LineClassification::Kind::Comment) { NextLine(); return true; } else if (inPreprocessorDirective_) { return false; } else if (lineClass.kind == LineClassification::Kind::ConditionalCompilationDirective || lineClass.kind == LineClassification::Kind::PreprocessorDirective) { // Allow conditional compilation directives (e.g., #ifdef) to affect // continuation lines. // Allow other preprocessor directives, too, except #include // (when it does not follow '&'), #define, and #undef (because // they cannot be allowed to affect preceding text on a // continued line). preprocessor_.Directive(TokenizePreprocessorDirective(), *this); return true; } else if (afterAmpersand && (lineClass.kind == LineClassification::Kind::IncludeDirective || lineClass.kind == LineClassification::Kind::IncludeLine)) { SkipToEndOfLine(); omitNewline_ = true; skipLeadingAmpersand_ = true; return false; } else { return false; } } const char *Prescanner::FixedFormContinuationLine(bool mightNeedSpace) { if (IsAtEnd()) { return nullptr; } tabInCurrentLine_ = false; char col1{*nextLine_}; if (InCompilerDirective()) { // Must be a continued compiler directive. if (!IsFixedFormCommentChar(col1)) { return nullptr; } int j{1}; for (; j < 5; ++j) { char ch{directiveSentinel_[j - 1]}; if (ch == '\0') { break; } if (ch != ToLowerCaseLetter(nextLine_[j])) { return nullptr; } } for (; j < 5; ++j) { if (nextLine_[j] != ' ') { return nullptr; } } char col6{nextLine_[5]}; if (col6 != '\n' && col6 != '\t' && col6 != ' ' && col6 != '0') { if (nextLine_[6] != ' ' && mightNeedSpace) { insertASpace_ = true; } return nextLine_ + 6; } return nullptr; } else { // Normal case: not in a compiler directive. if (col1 == '&' && features_.IsEnabled( LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) { // Extension: '&' as continuation marker if (features_.ShouldWarn( LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) { Say(GetProvenance(nextLine_), "nonstandard usage"_en_US); } return nextLine_ + 1; } if (col1 == '\t' && nextLine_[1] >= '1' && nextLine_[1] <= '9') { tabInCurrentLine_ = true; return nextLine_ + 2; // VAX extension } if (col1 == ' ' && nextLine_[1] == ' ' && nextLine_[2] == ' ' && nextLine_[3] == ' ' && nextLine_[4] == ' ') { char col6{nextLine_[5]}; if (col6 != '\n' && col6 != '\t' && col6 != ' ' && col6 != '0') { return nextLine_ + 6; } } if (IsImplicitContinuation()) { return nextLine_; } } return nullptr; // not a continuation line } const char *Prescanner::FreeFormContinuationLine(bool ampersand) { const char *p{nextLine_}; if (p >= limit_) { return nullptr; } p = SkipWhiteSpace(p); if (InCompilerDirective()) { if (*p++ != '!') { return nullptr; } for (const char *s{directiveSentinel_}; *s != '\0'; ++p, ++s) { if (*s != ToLowerCaseLetter(*p)) { return nullptr; } } p = SkipWhiteSpace(p); if (*p == '&') { if (!ampersand) { insertASpace_ = true; } return p + 1; } else if (ampersand) { return p; } else { return nullptr; } } else { if (*p == '&') { return p + 1; } else if (*p == '!' || *p == '\n' || *p == '#') { return nullptr; } else if (ampersand || IsImplicitContinuation()) { if (p > nextLine_) { --p; } else { insertASpace_ = true; } return p; } else { return nullptr; } } } bool Prescanner::FixedFormContinuation(bool mightNeedSpace) { // N.B. We accept '&' as a continuation indicator in fixed form, too, // but not in a character literal. if (*at_ == '&' && inCharLiteral_) { return false; } do { if (const char *cont{FixedFormContinuationLine(mightNeedSpace)}) { BeginSourceLine(cont); column_ = 7; NextLine(); return true; } } while (SkipCommentLine(false /* not after ampersand */)); return false; } bool Prescanner::FreeFormContinuation() { const char *p{at_}; bool ampersand{*p == '&'}; if (ampersand) { p = SkipWhiteSpace(p + 1); } if (*p != '\n') { if (inCharLiteral_) { return false; } else if (*p != '!' && features_.ShouldWarn(LanguageFeature::CruftAfterAmpersand)) { Say(GetProvenance(p), "missing ! before comment after &"_en_US); } } do { if (const char *cont{FreeFormContinuationLine(ampersand)}) { BeginSourceLine(cont); NextLine(); return true; } } while (SkipCommentLine(ampersand)); return false; } // Implicit line continuation allows a preprocessor macro call with // arguments to span multiple lines. bool Prescanner::IsImplicitContinuation() const { return !inPreprocessorDirective_ && !inCharLiteral_ && delimiterNesting_ > 0 && !IsAtEnd() && ClassifyLine(nextLine_).kind == LineClassification::Kind::Source; } bool Prescanner::Continuation(bool mightNeedFixedFormSpace) { if (*at_ == '\n' || *at_ == '&') { if (inFixedForm_) { return FixedFormContinuation(mightNeedFixedFormSpace); } else { return FreeFormContinuation(); } } else { return false; } } std::optional Prescanner::IsFixedFormCompilerDirectiveLine(const char *start) const { const char *p{start}; char col1{*p++}; if (!IsFixedFormCommentChar(col1)) { return std::nullopt; } char sentinel[5], *sp{sentinel}; int column{2}; for (; column < 6; ++column, ++p) { if (*p != ' ') { if (*p == '\n' || *p == '\t') { break; } if (sp == sentinel + 1 && sentinel[0] == '$' && IsDecimalDigit(*p)) { // OpenMP conditional compilation line: leave the label alone break; } *sp++ = ToLowerCaseLetter(*p); } } if (column == 6) { if (*p == ' ' || *p == '\t' || *p == '0') { ++p; } else { // This is a Continuation line, not an initial directive line. return std::nullopt; } } if (sp == sentinel) { return std::nullopt; } *sp = '\0'; if (const char *ss{IsCompilerDirectiveSentinel(sentinel)}) { std::size_t payloadOffset = p - start; return {LineClassification{ LineClassification::Kind::CompilerDirective, payloadOffset, ss}}; } return std::nullopt; } std::optional Prescanner::IsFreeFormCompilerDirectiveLine(const char *start) const { char sentinel[8]; const char *p{SkipWhiteSpace(start)}; if (*p++ != '!') { return std::nullopt; } for (std::size_t j{0}; j + 1 < sizeof sentinel; ++p, ++j) { if (*p == '\n') { break; } if (*p == ' ' || *p == '\t' || *p == '&') { if (j == 0) { break; } sentinel[j] = '\0'; p = SkipWhiteSpace(p + 1); if (*p == '!') { break; } if (const char *sp{IsCompilerDirectiveSentinel(sentinel)}) { std::size_t offset = p - start; return {LineClassification{ LineClassification::Kind::CompilerDirective, offset, sp}}; } break; } sentinel[j] = ToLowerCaseLetter(*p); } return std::nullopt; } Prescanner &Prescanner::AddCompilerDirectiveSentinel(const std::string &dir) { std::uint64_t packed{0}; for (char ch : dir) { packed = (packed << 8) | (ToLowerCaseLetter(ch) & 0xff); } compilerDirectiveBloomFilter_.set(packed % prime1); compilerDirectiveBloomFilter_.set(packed % prime2); compilerDirectiveSentinels_.insert(dir); return *this; } const char *Prescanner::IsCompilerDirectiveSentinel( const char *sentinel) const { std::uint64_t packed{0}; std::size_t n{0}; for (; sentinel[n] != '\0'; ++n) { packed = (packed << 8) | (sentinel[n] & 0xff); } if (n == 0 || !compilerDirectiveBloomFilter_.test(packed % prime1) || !compilerDirectiveBloomFilter_.test(packed % prime2)) { return nullptr; } const auto iter{compilerDirectiveSentinels_.find(std::string(sentinel, n))}; return iter == compilerDirectiveSentinels_.end() ? nullptr : iter->c_str(); } constexpr bool IsDirective(const char *match, const char *dir) { for (; *match; ++match) { if (*match != ToLowerCaseLetter(*dir++)) { return false; } } return true; } Prescanner::LineClassification Prescanner::ClassifyLine( const char *start) const { if (inFixedForm_) { if (std::optional lc{ IsFixedFormCompilerDirectiveLine(start)}) { return std::move(*lc); } if (IsFixedFormCommentLine(start)) { return {LineClassification::Kind::Comment}; } } else { if (std::optional lc{ IsFreeFormCompilerDirectiveLine(start)}) { return std::move(*lc); } if (const char *bang{IsFreeFormComment(start)}) { return {LineClassification::Kind::Comment, static_cast(bang - start)}; } } if (std::optional quoteOffset{IsIncludeLine(start)}) { return {LineClassification::Kind::IncludeLine, *quoteOffset}; } if (const char *dir{IsPreprocessorDirectiveLine(start)}) { if (IsDirective("if", dir) || IsDirective("elif", dir) || IsDirective("else", dir) || IsDirective("endif", dir)) { return {LineClassification::Kind::ConditionalCompilationDirective}; } else if (IsDirective("include", dir)) { return {LineClassification::Kind::IncludeDirective}; } else if (IsDirective("define", dir) || IsDirective("undef", dir)) { return {LineClassification::Kind::DefinitionDirective}; } else { return {LineClassification::Kind::PreprocessorDirective}; } } return {LineClassification::Kind::Source}; } void Prescanner::SourceFormChange(std::string &&dir) { if (dir == "!dir$ free") { inFixedForm_ = false; } else if (dir == "!dir$ fixed") { inFixedForm_ = true; } } } // namespace Fortran::parser