//===-- Core/IncludeDirectives.cpp - Include directives handling ----------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief This file defines the IncludeDirectives class that helps with
/// detecting and modifying \#include directives.
///
//===----------------------------------------------------------------------===//

#include "IncludeDirectives.h"
#include "clang/Basic/CharInfo.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Lex/HeaderSearch.h"
#include "clang/Lex/Preprocessor.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include <stack>

using namespace clang;
using namespace clang::tooling;
using llvm::StringRef;

/// \brief PPCallbacks that fills-in the include information in the given
/// \c IncludeDirectives.
class IncludeDirectivesPPCallback : public clang::PPCallbacks {
  // Struct helping the detection of header guards in the various callbacks
  struct GuardDetection {
    GuardDetection(FileID FID)
      : FID(FID), Count(0), TheMacro(nullptr), CountAtEndif(0) {}

    FileID FID;
    // count for relevant preprocessor directives
    unsigned Count;
    // the macro that is tested in the top most ifndef for the header guard
    // (e.g: GUARD_H)
    const IdentifierInfo *TheMacro;
    // the hash locations of #ifndef, #define, #endif
    SourceLocation IfndefLoc, DefineLoc, EndifLoc;
    // the value of Count once the #endif is reached
    unsigned CountAtEndif;

    /// \brief Check that with all the information gathered if this is a
    /// potential header guard.
    ///
    /// Meaning a top-most \#ifndef has been found, followed by a define and the
    /// last preprocessor directive was the terminating \#endif.
    ///
    /// FIXME: accept the \#if !defined identifier form too.
    bool isPotentialHeaderGuard() const {
      return Count == CountAtEndif && DefineLoc.isValid();
    }
  };

public:
  IncludeDirectivesPPCallback(IncludeDirectives *Self)
      : Self(Self), Guard(nullptr) {}
  virtual ~IncludeDirectivesPPCallback() {}

private:
  void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
                          StringRef FileName, bool IsAngled,
                          CharSourceRange FilenameRange, const FileEntry *File,
                          StringRef SearchPath, StringRef RelativePath,
                          const Module *Imported) override {
    SourceManager &SM = Self->Sources;
    const FileEntry *FE = SM.getFileEntryForID(SM.getFileID(HashLoc));
    assert(FE && "Valid file expected.");

    IncludeDirectives::Entry E(HashLoc, File, IsAngled);
    Self->FileToEntries[FE].push_back(E);
    Self->IncludeAsWrittenToLocationsMap[FileName].push_back(HashLoc);
  }

  // Keep track of the current file in the stack
  virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
                           SrcMgr::CharacteristicKind FileType,
                           FileID PrevFID) override {
    SourceManager &SM = Self->Sources;
    switch (Reason) {
    case EnterFile:
      Files.push(GuardDetection(SM.getFileID(Loc)));
      Guard = &Files.top();
      break;

    case ExitFile:
      if (Guard->isPotentialHeaderGuard())
        handlePotentialHeaderGuard(*Guard);
      Files.pop();
      Guard = &Files.top();
      break;

    default:
      break;
    }
  }

  /// \brief Mark this header as guarded in the IncludeDirectives if it's a
  /// proper header guard.
  void handlePotentialHeaderGuard(const GuardDetection &Guard) {
    SourceManager &SM = Self->Sources;
    const FileEntry *File = SM.getFileEntryForID(Guard.FID);
    const LangOptions &LangOpts = Self->CI.getLangOpts();

    // Null file can happen for the <built-in> buffer for example. They
    // shouldn't have header guards though...
    if (!File)
      return;

    // The #ifndef should be the next thing after the preamble. We aren't
    // checking for equality because it can also be part of the preamble if the
    // preamble is the whole file.
    unsigned Preamble =
        Lexer::ComputePreamble(SM.getBuffer(Guard.FID)->getBuffer(), LangOpts)
            .first;
    unsigned IfndefOffset = SM.getFileOffset(Guard.IfndefLoc);
    if (IfndefOffset > (Preamble + 1))
      return;

    // No code is allowed in the code remaining after the #endif.
    const llvm::MemoryBuffer *Buffer = SM.getBuffer(Guard.FID);
    Lexer Lex(SM.getLocForStartOfFile(Guard.FID), LangOpts,
              Buffer->getBufferStart(),
              Buffer->getBufferStart() + SM.getFileOffset(Guard.EndifLoc),
              Buffer->getBufferEnd());

    // Find the first newline not part of a multi-line comment.
    Token Tok;
    Lex.LexFromRawLexer(Tok); // skip endif
    Lex.LexFromRawLexer(Tok);

    // Not a proper header guard, the remainder of the file contains something
    // else than comments or whitespaces.
    if (Tok.isNot(tok::eof))
      return;

    // Add to the location of the define to the IncludeDirectives for this file.
    Self->HeaderToGuard[File] = Guard.DefineLoc;
  }

  virtual void Ifndef(SourceLocation Loc, const Token &MacroNameTok,
                      const MacroDirective *MD) override {
    Guard->Count++;

    // If this #ifndef is the top-most directive and the symbol isn't defined
    // store those information in the guard detection, the next step will be to
    // check for the define.
    if (Guard->Count == 1 && MD == nullptr) {
      IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();

      if (MII->hasMacroDefinition())
        return;
      Guard->IfndefLoc = Loc;
      Guard->TheMacro = MII;
    }
  }

  virtual void MacroDefined(const Token &MacroNameTok,
                            const MacroDirective *MD) override {
    Guard->Count++;

    // If this #define is the second directive of the file and the symbol
    // defined is the same as the one checked in the #ifndef then store the
    // information about this define.
    if (Guard->Count == 2 && Guard->TheMacro != nullptr) {
      IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();

      // macro unrelated to the ifndef, doesn't look like a proper header guard
      if (MII->getName() != Guard->TheMacro->getName())
        return;

      Guard->DefineLoc = MacroNameTok.getLocation();
    }
  }

  void Endif(SourceLocation Loc, SourceLocation IfLoc) override {
    Guard->Count++;

    // If it's the #endif corresponding to the top-most #ifndef
    if (Self->Sources.getDecomposedLoc(Guard->IfndefLoc) !=
        Self->Sources.getDecomposedLoc(IfLoc))
      return;

    // And that the top-most #ifndef was followed by the right #define
    if (Guard->DefineLoc.isInvalid())
      return;

    // Then save the information about this #endif. Once the file is exited we
    // will check if it was the final preprocessor directive.
    Guard->CountAtEndif = Guard->Count;
    Guard->EndifLoc = Loc;
  }

  virtual void MacroExpands(const Token &, const MacroDirective *, SourceRange,
                            const MacroArgs *) override {
    Guard->Count++;
  }
  virtual void MacroUndefined(const Token &,
                              const MacroDirective *) override {
    Guard->Count++;
  }
  virtual void Defined(const Token &, const MacroDirective *,
                       SourceRange) override {
    Guard->Count++;
  }
  virtual void If(SourceLocation, SourceRange,
                  ConditionValueKind) override {
    Guard->Count++;
  }
  virtual void Elif(SourceLocation, SourceRange, ConditionValueKind,
                    SourceLocation) override {
    Guard->Count++;
  }
  virtual void Ifdef(SourceLocation, const Token &,
                     const MacroDirective *) override {
    Guard->Count++;
  }
  void Else(SourceLocation, SourceLocation) override {
    Guard->Count++;
  }

  IncludeDirectives *Self;
  // keep track of the guard info through the include stack
  std::stack<GuardDetection> Files;
  // convenience field pointing to Files.top().second
  GuardDetection *Guard;
};

// Flags that describes where to insert newlines.
enum NewLineFlags {
  // Prepend a newline at the beginning of the insertion.
  NL_Prepend = 0x1,

  // Prepend another newline at the end of the insertion.
  NL_PrependAnother = 0x2,

  // Add two newlines at the end of the insertion.
  NL_AppendTwice = 0x4,

  // Convenience value to enable both \c NL_Prepend and \c NL_PrependAnother.
  NL_PrependTwice = NL_Prepend | NL_PrependAnother
};

/// \brief Guess the end-of-line sequence used in the given FileID. If the
/// sequence can't be guessed return an Unix-style newline.
static StringRef guessEOL(SourceManager &SM, FileID ID) {
  StringRef Content = SM.getBufferData(ID);
  StringRef Buffer = Content.substr(Content.find_first_of("\r\n"));

  return llvm::StringSwitch<StringRef>(Buffer)
      .StartsWith("\r\n", "\r\n")
      .StartsWith("\n\r", "\n\r")
      .StartsWith("\r", "\r")
      .Default("\n");
}

/// \brief Find the end of the end of the directive, either the beginning of a
/// newline or the end of file.
//
// \return The offset into the file where the directive ends along with a
// boolean value indicating whether the directive ends because the end of file
// was reached or not.
static std::pair<unsigned, bool> findDirectiveEnd(SourceLocation HashLoc,
                                                  SourceManager &SM,
                                                  const LangOptions &LangOpts) {
  FileID FID = SM.getFileID(HashLoc);
  unsigned Offset = SM.getFileOffset(HashLoc);
  StringRef Content = SM.getBufferData(FID);
  Lexer Lex(SM.getLocForStartOfFile(FID), LangOpts, Content.begin(),
            Content.begin() + Offset, Content.end());
  Lex.SetCommentRetentionState(true);
  Token Tok;

  // This loop look for the newline after our directive but avoids the ones part
  // of a multi-line comments:
  //
  //     #include <foo> /* long \n comment */\n
  //                            ~~ no        ~~ yes
  for (;;) {
    // find the beginning of the end-of-line sequence
    StringRef::size_type EOLOffset = Content.find_first_of("\r\n", Offset);
    // ends because EOF was reached
    if (EOLOffset == StringRef::npos)
      return std::make_pair(Content.size(), true);

    // find the token that contains our end-of-line
    unsigned TokEnd = 0;
    do {
      Lex.LexFromRawLexer(Tok);
      TokEnd = SM.getFileOffset(Tok.getLocation()) + Tok.getLength();

      // happens when the whitespaces are eaten after a multiline comment
      if (Tok.is(tok::eof))
        return std::make_pair(EOLOffset, false);
    } while (TokEnd < EOLOffset);

    // the end-of-line is not part of a multi-line comment, return its location
    if (Tok.isNot(tok::comment))
      return std::make_pair(EOLOffset, false);

    // for the next search to start after the end of this token
    Offset = TokEnd;
  }
}

IncludeDirectives::IncludeDirectives(clang::CompilerInstance &CI)
    : CI(CI), Sources(CI.getSourceManager()) {
  // addPPCallbacks takes ownership of the callback
  CI.getPreprocessor().addPPCallbacks(
                          llvm::make_unique<IncludeDirectivesPPCallback>(this));
}

bool IncludeDirectives::lookForInclude(const FileEntry *File,
                                       const LocationVec &IncludeLocs,
                                       SeenFilesSet &Seen) const {
  // mark this file as visited
  Seen.insert(File);

  // First check if included directly in this file
  for (LocationVec::const_iterator I = IncludeLocs.begin(),
                                   E = IncludeLocs.end();
       I != E; ++I)
    if (Sources.getFileEntryForID(Sources.getFileID(*I)) == File)
      return true;

  // Otherwise look recursively all the included files
  FileToEntriesMap::const_iterator EntriesIt = FileToEntries.find(File);
  if (EntriesIt == FileToEntries.end())
    return false;
  for (EntryVec::const_iterator I = EntriesIt->second.begin(),
                                E = EntriesIt->second.end();
       I != E; ++I) {
    // skip if this header has already been checked before
    if (Seen.count(I->getIncludedFile()))
      continue;
    if (lookForInclude(I->getIncludedFile(), IncludeLocs, Seen))
      return true;
  }
  return false;
}

bool IncludeDirectives::hasInclude(const FileEntry *File,
                                   StringRef Include) const {
  llvm::StringMap<LocationVec>::const_iterator It =
      IncludeAsWrittenToLocationsMap.find(Include);

  // Include isn't included in any file
  if (It == IncludeAsWrittenToLocationsMap.end())
    return false;

  SeenFilesSet Seen;
  return lookForInclude(File, It->getValue(), Seen);
}

Replacement IncludeDirectives::addAngledInclude(const clang::FileEntry *File,
                                                llvm::StringRef Include) {
  FileID FID = Sources.translateFile(File);
  assert(!FID.isInvalid() && "Invalid file entry given!");

  if (hasInclude(File, Include))
    return Replacement();

  unsigned Offset, NLFlags;
  std::tie(Offset, NLFlags) = angledIncludeInsertionOffset(FID);

  StringRef EOL = guessEOL(Sources, FID);
  llvm::SmallString<32> InsertionText;
  if (NLFlags & NL_Prepend)
    InsertionText += EOL;
  if (NLFlags & NL_PrependAnother)
    InsertionText += EOL;
  InsertionText += "#include <";
  InsertionText += Include;
  InsertionText += ">";
  if (NLFlags & NL_AppendTwice) {
    InsertionText += EOL;
    InsertionText += EOL;
  }
  return Replacement(File->getName(), Offset, 0, InsertionText);
}

Replacement IncludeDirectives::addAngledInclude(llvm::StringRef File,
                                                llvm::StringRef Include) {
  const FileEntry *Entry = Sources.getFileManager().getFile(File);
  assert(Entry && "Invalid file given!");
  return addAngledInclude(Entry, Include);
}

std::pair<unsigned, unsigned>
IncludeDirectives::findFileHeaderEndOffset(FileID FID) const {
  unsigned NLFlags = NL_Prepend;
  StringRef Content = Sources.getBufferData(FID);
  Lexer Lex(Sources.getLocForStartOfFile(FID), CI.getLangOpts(),
            Content.begin(), Content.begin(), Content.end());
  Lex.SetCommentRetentionState(true);
  Lex.SetKeepWhitespaceMode(true);

  // find the first newline not part of a multi-line comment
  Token Tok;
  do {
    Lex.LexFromRawLexer(Tok);
    unsigned Offset = Sources.getFileOffset(Tok.getLocation());
    // allow one newline between the comments
    if (Tok.is(tok::unknown) && isWhitespace(Content[Offset])) {
      StringRef Whitespaces(Content.substr(Offset, Tok.getLength()));
      if (Whitespaces.count('\n') == 1 || Whitespaces.count('\r') == 1)
        Lex.LexFromRawLexer(Tok);
      else {
        // add an empty line to separate the file header and the inclusion
        NLFlags = NL_PrependTwice;
      }
    }
  } while (Tok.is(tok::comment));

  // apparently there is no header, insertion point is the beginning of the file
  if (Tok.isNot(tok::unknown))
    return std::make_pair(0, NL_AppendTwice);
  return std::make_pair(Sources.getFileOffset(Tok.getLocation()), NLFlags);
}

SourceLocation
IncludeDirectives::angledIncludeHintLoc(FileID FID) const {
  FileToEntriesMap::const_iterator EntriesIt =
      FileToEntries.find(Sources.getFileEntryForID(FID));

  if (EntriesIt == FileToEntries.end())
    return SourceLocation();

  HeaderSearch &HeaderInfo = CI.getPreprocessor().getHeaderSearchInfo();
  const EntryVec &Entries = EntriesIt->second;
  EntryVec::const_reverse_iterator QuotedCandidate = Entries.rend();
  for (EntryVec::const_reverse_iterator I = Entries.rbegin(),
                                        E = Entries.rend();
       I != E; ++I) {
    // Headers meant for multiple inclusion can potentially appears in the
    // middle of the code thus making them a poor choice for an insertion point.
    if (!HeaderInfo.isFileMultipleIncludeGuarded(I->getIncludedFile()))
      continue;

    // return preferably the last angled include
    if (I->isAngled())
      return I->getHashLocation();

    // keep track of the last quoted include that is guarded
    if (QuotedCandidate == Entries.rend())
      QuotedCandidate = I;
  }

  if (QuotedCandidate == Entries.rend())
    return SourceLocation();

  // return the last quoted-include if we couldn't find an angled one
  return QuotedCandidate->getHashLocation();
}

std::pair<unsigned, unsigned>
IncludeDirectives::angledIncludeInsertionOffset(FileID FID) const {
  SourceLocation Hint = angledIncludeHintLoc(FID);
  unsigned NL_Flags = NL_Prepend;

  // If we can't find a similar include and we are in a header check if it's a
  // guarded header. If so the hint will be the location of the #define from the
  // guard.
  if (Hint.isInvalid()) {
    const FileEntry *File = Sources.getFileEntryForID(FID);
    HeaderToGuardMap::const_iterator GuardIt = HeaderToGuard.find(File);
    if (GuardIt != HeaderToGuard.end()) {
      // get the hash location from the #define
      Hint = GuardIt->second;
      // we want a blank line between the #define and the #include
      NL_Flags = NL_PrependTwice;
    }
  }

  // no hints, insertion is done after the file header
  if (Hint.isInvalid())
    return findFileHeaderEndOffset(FID);

  unsigned Offset = findDirectiveEnd(Hint, Sources, CI.getLangOpts()).first;
  return std::make_pair(Offset, NL_Flags);
}
