[index] Improve macro indexing support

The major change here is to index macro occurrences in more places than
before, specifically

* In non-expansion references such as `#if`, `#ifdef`, etc.
* When the macro is a reference to a builtin macro such as __LINE__.
* When using the preprocessor state instead of callbacks, we now include
  all definition locations and undefinitions instead of just the latest
  one (which may also have had the wrong location previously).
* When indexing an existing module file (.pcm), we now include module
  macros, and we no longer report unrelated preprocessor macros during
  indexing the module, which could have caused duplication.

Additionally, we now correctly obey the system symbol filter for macros,
so by default in system headers only definition/undefinition occurrences
are reported, but it can be configured to report references as well if
desired.

Extends FileIndexRecord to support occurrences of macros. Since the
design of this type is to keep a single list of entities organized by
source location, we incorporate macros into the existing DeclOccurrence
struct.

Differential Revision: https://reviews.llvm.org/D99758
This commit is contained in:
Ben Langmuir 2021-03-23 15:22:58 -07:00
parent 9c5ebf0358
commit 93c87fc06e
15 changed files with 290 additions and 60 deletions

View file

@ -9,26 +9,31 @@
#ifndef LLVM_CLANG_INDEX_DECLOCCURRENCE_H
#define LLVM_CLANG_INDEX_DECLOCCURRENCE_H
#include "clang/AST/DeclBase.h"
#include "clang/Basic/LLVM.h"
#include "clang/Index/IndexSymbol.h"
#include "clang/Lex/MacroInfo.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/SmallVector.h"
namespace clang {
class Decl;
namespace index {
struct DeclOccurrence {
SymbolRoleSet Roles;
unsigned Offset;
const Decl *Dcl;
llvm::PointerUnion<const Decl *, const MacroInfo *> DeclOrMacro;
const IdentifierInfo *MacroName = nullptr;
SmallVector<SymbolRelation, 3> Relations;
DeclOccurrence(SymbolRoleSet R, unsigned Offset, const Decl *D,
ArrayRef<SymbolRelation> Relations)
: Roles(R), Offset(Offset), Dcl(D),
: Roles(R), Offset(Offset), DeclOrMacro(D),
Relations(Relations.begin(), Relations.end()) {}
DeclOccurrence(SymbolRoleSet R, unsigned Offset, const IdentifierInfo *Name,
const MacroInfo *MI)
: Roles(R), Offset(Offset), DeclOrMacro(MI), MacroName(Name) {}
friend bool operator<(const DeclOccurrence &LHS, const DeclOccurrence &RHS) {
return LHS.Offset < RHS.Offset;

View file

@ -28,6 +28,7 @@ struct IndexingOptions {
SystemSymbolFilterKind::DeclarationsOnly;
bool IndexFunctionLocals = false;
bool IndexImplicitInstantiation = false;
bool IndexMacros = true;
// Whether to index macro definitions in the Preprocesor when preprocessor
// callback is not available (e.g. after parsing has finished). Note that
// macro references are not available in Proprocessor.

View file

@ -17,42 +17,68 @@
using namespace clang;
using namespace clang::index;
static void addOccurrence(std::vector<DeclOccurrence> &Decls,
DeclOccurrence Info) {
auto IsNextOccurence = [&]() -> bool {
if (Decls.empty())
return true;
auto &Last = Decls.back();
return Last.Offset < Info.Offset;
};
if (IsNextOccurence()) {
Decls.push_back(std::move(Info));
return;
}
// We keep Decls in order as we need to access them in this order in all cases.
auto It = llvm::upper_bound(Decls, Info);
Decls.insert(It, std::move(Info));
}
void FileIndexRecord::addDeclOccurence(SymbolRoleSet Roles, unsigned Offset,
const Decl *D,
ArrayRef<SymbolRelation> Relations) {
assert(D->isCanonicalDecl() &&
"Occurrences should be associated with their canonical decl");
auto IsNextOccurence = [&]() -> bool {
if (Decls.empty())
return true;
auto &Last = Decls.back();
return Last.Offset < Offset;
};
if (IsNextOccurence()) {
Decls.emplace_back(Roles, Offset, D, Relations);
return;
}
DeclOccurrence NewInfo(Roles, Offset, D, Relations);
// We keep Decls in order as we need to access them in this order in all cases.
auto It = llvm::upper_bound(Decls, NewInfo);
Decls.insert(It, std::move(NewInfo));
addOccurrence(Decls, DeclOccurrence(Roles, Offset, D, Relations));
}
void FileIndexRecord::print(llvm::raw_ostream &OS) const {
void FileIndexRecord::addMacroOccurence(SymbolRoleSet Roles, unsigned Offset,
const IdentifierInfo *Name,
const MacroInfo *MI) {
addOccurrence(Decls, DeclOccurrence(Roles, Offset, Name, MI));
}
void FileIndexRecord::removeHeaderGuardMacros() {
auto It =
std::remove_if(Decls.begin(), Decls.end(), [](const DeclOccurrence &D) {
if (const auto *MI = D.DeclOrMacro.dyn_cast<const MacroInfo *>())
return MI->isUsedForHeaderGuard();
return false;
});
Decls.erase(It, Decls.end());
}
void FileIndexRecord::print(llvm::raw_ostream &OS, SourceManager &SM) const {
OS << "DECLS BEGIN ---\n";
for (auto &DclInfo : Decls) {
const Decl *D = DclInfo.Dcl;
SourceManager &SM = D->getASTContext().getSourceManager();
SourceLocation Loc = SM.getFileLoc(D->getLocation());
PresumedLoc PLoc = SM.getPresumedLoc(Loc);
OS << llvm::sys::path::filename(PLoc.getFilename()) << ':' << PLoc.getLine()
<< ':' << PLoc.getColumn();
if (const auto *D = DclInfo.DeclOrMacro.dyn_cast<const Decl *>()) {
SourceLocation Loc = SM.getFileLoc(D->getLocation());
PresumedLoc PLoc = SM.getPresumedLoc(Loc);
OS << llvm::sys::path::filename(PLoc.getFilename()) << ':'
<< PLoc.getLine() << ':' << PLoc.getColumn();
if (auto ND = dyn_cast<NamedDecl>(D)) {
OS << ' ' << ND->getDeclName();
if (const auto *ND = dyn_cast<NamedDecl>(D)) {
OS << ' ' << ND->getDeclName();
}
} else {
const auto *MI = DclInfo.DeclOrMacro.get<const MacroInfo *>();
SourceLocation Loc = SM.getFileLoc(MI->getDefinitionLoc());
PresumedLoc PLoc = SM.getPresumedLoc(Loc);
OS << llvm::sys::path::filename(PLoc.getFilename()) << ':'
<< PLoc.getLine() << ':' << PLoc.getColumn();
OS << ' ' << DclInfo.MacroName->getName();
}
OS << '\n';

View file

@ -48,7 +48,21 @@ public:
/// \param Relations the set of symbols related to this occurrence.
void addDeclOccurence(SymbolRoleSet Roles, unsigned Offset, const Decl *D,
ArrayRef<SymbolRelation> Relations);
void print(llvm::raw_ostream &OS) const;
/// Adds an occurrence of the given macro at the supplied \c Offset.
///
/// \param Roles the roles the occurrence fulfills in this position.
/// \param Offset the offset in the file of this occurrence.
/// \param Name the name of the macro.
/// \param MI the canonical declaration this is an occurrence of.
void addMacroOccurence(SymbolRoleSet Roles, unsigned Offset,
const IdentifierInfo *Name, const MacroInfo *MI);
/// Remove any macro occurrences for header guards. When preprocessing, this
/// will only be accurate after HandleEndOfFile.
void removeHeaderGuardMacros();
void print(llvm::raw_ostream &OS, SourceManager &SM) const;
};
} // end namespace index

View file

@ -51,6 +51,32 @@ public:
MacroNameTok.getLocation(),
*MD.getMacroInfo());
}
void Defined(const Token &MacroNameTok, const MacroDefinition &MD,
SourceRange Range) override {
if (!MD.getMacroInfo()) // Ignore nonexistent macro.
return;
// Note: this is defined(M), not #define M
IndexCtx->handleMacroReference(*MacroNameTok.getIdentifierInfo(),
MacroNameTok.getLocation(),
*MD.getMacroInfo());
}
void Ifdef(SourceLocation Loc, const Token &MacroNameTok,
const MacroDefinition &MD) override {
if (!MD.getMacroInfo()) // Ignore non-existent macro.
return;
IndexCtx->handleMacroReference(*MacroNameTok.getIdentifierInfo(),
MacroNameTok.getLocation(),
*MD.getMacroInfo());
}
void Ifndef(SourceLocation Loc, const Token &MacroNameTok,
const MacroDefinition &MD) override {
if (!MD.getMacroInfo()) // Ignore nonexistent macro.
return;
IndexCtx->handleMacroReference(*MacroNameTok.getIdentifierInfo(),
MacroNameTok.getLocation(),
*MD.getMacroInfo());
}
};
class IndexASTConsumer final : public ASTConsumer {
@ -162,23 +188,54 @@ static void indexTranslationUnit(ASTUnit &Unit, IndexingContext &IndexCtx) {
Unit.visitLocalTopLevelDecls(&IndexCtx, topLevelDeclVisitor);
}
static void indexPreprocessorMacros(const Preprocessor &PP,
IndexDataConsumer &DataConsumer) {
for (const auto &M : PP.macros())
if (MacroDirective *MD = M.second.getLatest()) {
auto *MI = MD->getMacroInfo();
// When using modules, it may happen that we find #undef of a macro that
// was defined in another module. In such case, MI may be nullptr, since
// we only look for macro definitions in the current TU. In that case,
// there is nothing to index.
if (!MI)
continue;
static void indexPreprocessorMacro(const IdentifierInfo *II,
const MacroInfo *MI,
MacroDirective::Kind DirectiveKind,
SourceLocation Loc,
IndexDataConsumer &DataConsumer) {
// When using modules, it may happen that we find #undef of a macro that
// was defined in another module. In such case, MI may be nullptr, since
// we only look for macro definitions in the current TU. In that case,
// there is nothing to index.
if (!MI)
return;
DataConsumer.handleMacroOccurrence(
M.first, MD->getMacroInfo(),
static_cast<unsigned>(index::SymbolRole::Definition),
MD->getLocation());
// Skip implicit visibility change.
if (DirectiveKind == MacroDirective::MD_Visibility)
return;
auto Role = DirectiveKind == MacroDirective::MD_Define
? SymbolRole::Definition
: SymbolRole::Undefinition;
DataConsumer.handleMacroOccurrence(II, MI, static_cast<unsigned>(Role), Loc);
}
static void indexPreprocessorMacros(Preprocessor &PP,
IndexDataConsumer &DataConsumer) {
for (const auto &M : PP.macros()) {
for (auto *MD = M.second.getLatest(); MD; MD = MD->getPrevious()) {
indexPreprocessorMacro(M.first, MD->getMacroInfo(), MD->getKind(),
MD->getLocation(), DataConsumer);
}
}
}
static void indexPreprocessorModuleMacros(Preprocessor &PP,
serialization::ModuleFile &Mod,
IndexDataConsumer &DataConsumer) {
for (const auto &M : PP.macros()) {
if (M.second.getLatest() == nullptr) {
for (auto *MM : PP.getLeafModuleMacros(M.first)) {
auto *OwningMod = MM->getOwningModule();
if (OwningMod && OwningMod->getASTFile() == Mod.File) {
if (auto *MI = MM->getMacroInfo()) {
indexPreprocessorMacro(M.first, MI, MacroDirective::MD_Define,
MI->getDefinitionLoc(), DataConsumer);
}
}
}
}
}
}
void index::indexASTUnit(ASTUnit &Unit, IndexDataConsumer &DataConsumer,
@ -225,8 +282,9 @@ void index::indexModuleFile(serialization::ModuleFile &Mod, ASTReader &Reader,
IndexCtx.setASTContext(Ctx);
DataConsumer.initialize(Ctx);
if (Opts.IndexMacrosInPreprocessor)
indexPreprocessorMacros(Reader.getPreprocessor(), DataConsumer);
if (Opts.IndexMacrosInPreprocessor) {
indexPreprocessorModuleMacros(Reader.getPreprocessor(), Mod, DataConsumer);
}
for (const Decl *D : Reader.getModuleFileLevelDecls(Mod)) {
IndexCtx.indexTopLevelDecl(D);

View file

@ -457,6 +457,8 @@ bool IndexingContext::handleDeclOccurrence(const Decl *D, SourceLocation Loc,
void IndexingContext::handleMacroDefined(const IdentifierInfo &Name,
SourceLocation Loc,
const MacroInfo &MI) {
if (!shouldIndexMacroOccurrence(/*IsRef=*/false, Loc))
return;
SymbolRoleSet Roles = (unsigned)SymbolRole::Definition;
DataConsumer.handleMacroOccurrence(&Name, &MI, Roles, Loc);
}
@ -464,6 +466,8 @@ void IndexingContext::handleMacroDefined(const IdentifierInfo &Name,
void IndexingContext::handleMacroUndefined(const IdentifierInfo &Name,
SourceLocation Loc,
const MacroInfo &MI) {
if (!shouldIndexMacroOccurrence(/*IsRef=*/false, Loc))
return;
SymbolRoleSet Roles = (unsigned)SymbolRole::Undefinition;
DataConsumer.handleMacroOccurrence(&Name, &MI, Roles, Loc);
}
@ -471,6 +475,37 @@ void IndexingContext::handleMacroUndefined(const IdentifierInfo &Name,
void IndexingContext::handleMacroReference(const IdentifierInfo &Name,
SourceLocation Loc,
const MacroInfo &MI) {
if (!shouldIndexMacroOccurrence(/*IsRef=*/true, Loc))
return;
SymbolRoleSet Roles = (unsigned)SymbolRole::Reference;
DataConsumer.handleMacroOccurrence(&Name, &MI, Roles, Loc);
}
bool IndexingContext::shouldIndexMacroOccurrence(bool IsRef,
SourceLocation Loc) {
if (!IndexOpts.IndexMacros)
return false;
switch (IndexOpts.SystemSymbolFilter) {
case IndexingOptions::SystemSymbolFilterKind::None:
break;
case IndexingOptions::SystemSymbolFilterKind::DeclarationsOnly:
if (!IsRef)
return true;
break;
case IndexingOptions::SystemSymbolFilterKind::All:
return true;
}
SourceManager &SM = Ctx->getSourceManager();
FileID FID = SM.getFileID(SM.getFileLoc(Loc));
if (FID.isInvalid())
return false;
bool Invalid = false;
const SrcMgr::SLocEntry &SEntry = SM.getSLocEntry(FID, &Invalid);
if (Invalid || !SEntry.isFile())
return false;
return SEntry.getFile().getFileCharacteristic() == SrcMgr::C_User;
}

View file

@ -124,6 +124,8 @@ public:
private:
bool shouldIgnoreIfImplicit(const Decl *D);
bool shouldIndexMacroOccurrence(bool IsRef, SourceLocation Loc);
bool handleDeclOccurrence(const Decl *D, SourceLocation Loc,
bool IsRef, const Decl *Parent,
SymbolRoleSet Roles,

View file

@ -1103,15 +1103,14 @@ bool clang::index::generateUSRForMacro(const MacroDefinitionRecord *MD,
bool clang::index::generateUSRForMacro(StringRef MacroName, SourceLocation Loc,
const SourceManager &SM,
SmallVectorImpl<char> &Buf) {
// Don't generate USRs for things with invalid locations.
if (MacroName.empty() || Loc.isInvalid())
if (MacroName.empty())
return true;
llvm::raw_svector_ostream Out(Buf);
// Assume that system headers are sane. Don't put source location
// information into the USR if the macro comes from a system header.
bool ShouldGenerateLocation = !SM.isInSystemHeader(Loc);
bool ShouldGenerateLocation = Loc.isValid() && !SM.isInSystemHeader(Loc);
Out << getUSRSpacePrefix();
if (ShouldGenerateLocation)

View file

@ -1,2 +1,3 @@
void ModA_func(void);
#define MODA_MACRO 1

View file

@ -1,2 +1,3 @@
void SubModA_func(void);
#define SUBMODA_MACRO 1

View file

@ -34,3 +34,18 @@ class SubCls1 : public Cls {
// CHECK-NOT: [[@LINE+1]]:3 | class/C++ | SubCls1 |
SubCls1 *f;
};
// FIXME: this decl gets reported after the macro definitions, immediately
// before the next declaration. Add a dummy declaration so that the checks work.
void reset_parser();
// CHECK: [[@LINE+1]]:9 | macro/C | SYSTEM_MACRO | c:@macro@SYSTEM_MACRO | Def
#define SYSTEM_MACRO 1
// CHECK: [[@LINE+1]]:8 | macro/C | SYSTEM_MACRO | c:@macro@SYSTEM_MACRO | Undef
#undef SYSTEM_MACRO
// CHECK: [[@LINE+1]]:9 | macro/C | SYSTEM_MACRO | c:@macro@SYSTEM_MACRO | Def
#define SYSTEM_MACRO int fromSystemMacro = 1
// CHECK-NOT: [[@LINE+2]]:1 | macro/C
// CHECK: [[@LINE+1]]:1 | variable/C | fromSystemMacro
SYSTEM_MACRO;

View file

@ -1,12 +1,47 @@
// RUN: c-index-test core -print-source-symbols -- %s | FileCheck %s
// RUN: c-index-test core -print-source-symbols -ignore-macros -- %s | FileCheck %s -check-prefix=DISABLED
// DISABLED-NOT: macro/C
// DISABLED-NOT: XI
// CHECK: [[@LINE+1]]:9 | macro/C | X1 | c:index-macros.c@157@macro@X1 | Def |
// CHECK: [[@LINE+1]]:9 | macro/C | X1 | [[X1_USR:.*@macro@X1]] | Def |
#define X1 1
// CHECK: [[@LINE+1]]:9 | macro/C | DEF | c:index-macros.c@251@macro@DEF | Def |
// CHECK: [[@LINE+1]]:9 | macro/C | DEF | [[DEF_USR:.*@macro@DEF]] | Def |
#define DEF(x) int x
// CHECK: [[@LINE+1]]:8 | macro/C | X1 | c:index-macros.c@157@macro@X1 | Undef |
// CHECK: [[@LINE+1]]:8 | macro/C | X1 | [[X1_USR]] | Undef |
#undef X1
// CHECK: [[@LINE+2]]:1 | macro/C | DEF | c:index-macros.c@251@macro@DEF | Ref |
// CHECK: [[@LINE+1]]:9 | macro/C | C | [[C_USR:.*@macro@C]] | Def |
#define C 1
// CHECK: [[@LINE+1]]:5 | macro/C | C | [[C_USR]] | Ref |
#if C
#endif
// CHECK: [[@LINE+1]]:8 | macro/C | C | [[C_USR]] | Ref |
#ifdef C
#endif
// CHECK: [[@LINE+1]]:9 | macro/C | C | [[C_USR]] | Ref |
#ifndef C
#endif
// CHECK: [[@LINE+1]]:13 | macro/C | C | [[C_USR]] | Ref |
#if defined(C)
#endif
// CHECK: [[@LINE+1]]:14 | macro/C | C | [[C_USR]] | Ref |
#if !defined(C)
#endif
// Nonexistent macros should not be included.
// CHECK-NOT: NOT_DEFINED
#ifdef NOT_DEFINED
#endif
#ifndef NOT_DEFINED
#endif
#if defined(NOT_DEFINED) && NOT_DEFINED
#elif !defined(NOT_DEFINED)
#endif
// CHECK: [[@LINE+1]]:5 | macro/C | __LINE__ | c:@macro@__LINE__ | Ref |
#if __LINE__ == 41
#endif
// CHECK: [[@LINE+2]]:1 | macro/C | DEF | [[DEF_USR]] | Ref |
// CHECK: [[@LINE+1]]:5 | variable/C | i | c:@i | {{.*}} | Def | rel: 0
DEF(i);

View file

@ -18,7 +18,10 @@ void foo() {
}
// CHECK: ==== Module ModA ====
// CHECK: 2:6 | function/C | ModA_func | c:@F@ModA_func | {{.*}} | Decl | rel: 0
// CHECK-DAG: 3:9 | macro/C | MODA_MACRO | c:{{.*}}@macro@MODA_MACRO | Def |
// CHECK-DAG: 3:9 | macro/C | SUBMODA_MACRO | c:{{.*}}@macro@SUBMODA_MACRO | Def |
// CHECK-DAG: 2:6 | function/C | ModA_func | c:@F@ModA_func | {{.*}} | Decl | rel: 0
// CHECK-DAG: 2:6 | function/C | SubModA_func | c:@F@SubModA_func | {{.*}} | Decl | rel: 0
// CHECK: ---- Module Inputs ----
// CHECK: user | {{.*}}ModA.h
// CHECK: user | {{.*}}module.modulemap

View file

@ -63,6 +63,9 @@ DumpModuleImports("dump-imported-module-files",
static cl::opt<bool>
IncludeLocals("include-locals", cl::desc("Print local symbols"));
static cl::opt<bool> IgnoreMacros("ignore-macros",
cl::desc("Skip indexing macros"));
static cl::opt<std::string>
ModuleFilePath("module-file",
cl::desc("Path to module file to print symbols from"));
@ -210,7 +213,8 @@ static void dumpModuleFileInputs(serialization::ModuleFile &Mod,
static bool printSourceSymbols(const char *Executable,
ArrayRef<const char *> Args,
bool dumpModuleImports, bool indexLocals) {
bool dumpModuleImports, bool indexLocals,
bool ignoreMacros) {
SmallVector<const char *, 4> ArgsWithProgName;
ArgsWithProgName.push_back(Executable);
ArgsWithProgName.append(Args.begin(), Args.end());
@ -224,6 +228,8 @@ static bool printSourceSymbols(const char *Executable,
auto DataConsumer = std::make_shared<PrintIndexDataConsumer>(OS);
IndexingOptions IndexOpts;
IndexOpts.IndexFunctionLocals = indexLocals;
IndexOpts.IndexMacros = !ignoreMacros;
IndexOpts.IndexMacrosInPreprocessor = !ignoreMacros;
std::unique_ptr<FrontendAction> IndexAction =
createIndexingAction(DataConsumer, IndexOpts);
@ -357,7 +363,7 @@ int indextest_core_main(int argc, const char **argv) {
}
return printSourceSymbols(Executable.c_str(), CompArgs,
options::DumpModuleImports,
options::IncludeLocals);
options::IncludeLocals, options::IgnoreMacros);
}
return 0;

View file

@ -161,12 +161,41 @@ TEST(IndexTest, Simple) {
}
TEST(IndexTest, IndexPreprocessorMacros) {
std::string Code = "#define INDEX_MAC 1";
std::string Code = R"cpp(
#define INDEX_MAC 1
#define INDEX_MAC_UNDEF 1
#undef INDEX_MAC_UNDEF
#define INDEX_MAC_REDEF 1
#undef INDEX_MAC_REDEF
#define INDEX_MAC_REDEF 2
)cpp";
auto Index = std::make_shared<Indexer>();
IndexingOptions Opts;
Opts.IndexMacrosInPreprocessor = true;
tooling::runToolOnCode(std::make_unique<IndexAction>(Index, Opts), Code);
EXPECT_THAT(Index->Symbols, Contains(QName("INDEX_MAC")));
EXPECT_THAT(Index->Symbols,
Contains(AllOf(QName("INDEX_MAC"), WrittenAt(Position(2, 13)),
DeclAt(Position(2, 13)),
HasRole(SymbolRole::Definition))));
EXPECT_THAT(
Index->Symbols,
AllOf(Contains(AllOf(QName("INDEX_MAC_UNDEF"), WrittenAt(Position(3, 13)),
DeclAt(Position(3, 13)),
HasRole(SymbolRole::Definition))),
Contains(AllOf(QName("INDEX_MAC_UNDEF"), WrittenAt(Position(4, 12)),
DeclAt(Position(3, 13)),
HasRole(SymbolRole::Undefinition)))));
EXPECT_THAT(
Index->Symbols,
AllOf(Contains(AllOf(QName("INDEX_MAC_REDEF"), WrittenAt(Position(5, 13)),
DeclAt(Position(5, 13)),
HasRole(SymbolRole::Definition))),
Contains(AllOf(QName("INDEX_MAC_REDEF"), WrittenAt(Position(6, 12)),
DeclAt(Position(5, 13)),
HasRole(SymbolRole::Undefinition))),
Contains(AllOf(QName("INDEX_MAC_REDEF"), WrittenAt(Position(7, 13)),
DeclAt(Position(7, 13)),
HasRole(SymbolRole::Definition)))));
Opts.IndexMacrosInPreprocessor = false;
Index->Symbols.clear();