diff --git a/infra/indexer/frontend/common.cc b/infra/indexer/frontend/common.cc index e549603185f0..947bddf28689 100644 --- a/infra/indexer/frontend/common.cc +++ b/infra/indexer/frontend/common.cc @@ -15,13 +15,12 @@ #include "indexer/frontend/common.h" #include -#include #include // NOLINT -#include #include #include "indexer/index/in_memory_index.h" #include "indexer/index/types.h" +#include "absl/log/check.h" #include "absl/strings/string_view.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" @@ -30,10 +29,15 @@ namespace oss_fuzz { namespace indexer { -std::string CleanPath(absl::string_view path, absl::string_view cwd) { +std::string ToNormalizedAbsolutePath( + absl::string_view path, const clang::SourceManager& source_manager) { std::filesystem::path native_path = std::filesystem::path(path); if (!native_path.is_absolute()) { - native_path = std::filesystem::path(cwd); + llvm::ErrorOr cwd = source_manager.getFileManager() + .getVirtualFileSystem() + .getCurrentWorkingDirectory(); + QCHECK(cwd) << "unable to get cwd"; + native_path = std::filesystem::path(*cwd); native_path.append(path); } return native_path.lexically_normal(); @@ -83,17 +87,9 @@ LocationId GetLocationId(InMemoryIndex& index, end_line = start_line; } - llvm::ErrorOr cwd = source_manager.getFileManager() - .getVirtualFileSystem() - .getCurrentWorkingDirectory(); - if (!cwd) { - std::cerr << "unable to get cwd\n"; - exit(1); - } - if (IsRealPath(path)) { // This is a real file path, so normalize it. - path = CleanPath(path, *cwd); + path = ToNormalizedAbsolutePath(path, source_manager); } return index.GetLocationId({path, start_line, end_line}); } diff --git a/infra/indexer/frontend/common.h b/infra/indexer/frontend/common.h index 94e8898ba6dc..1fe2f1200839 100644 --- a/infra/indexer/frontend/common.h +++ b/infra/indexer/frontend/common.h @@ -15,12 +15,21 @@ #ifndef OSS_FUZZ_INFRA_INDEXER_FRONTEND_COMMON_H_ #define OSS_FUZZ_INFRA_INDEXER_FRONTEND_COMMON_H_ +#include + #include "indexer/index/in_memory_index.h" #include "indexer/index/types.h" +#include "absl/strings/string_view.h" #include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" namespace oss_fuzz { namespace indexer { +// Converts a source-level `path` into a normalized absolute form suitable for +// passing to the indexer as a location path. +std::string ToNormalizedAbsolutePath( + absl::string_view path, const clang::SourceManager& source_manager); + // Converts a pair of `SourceLocation` to a `LocationId` for a location in the // index. LocationId GetLocationId(InMemoryIndex& index, diff --git a/infra/indexer/frontend/frontend_test.cc b/infra/indexer/frontend/frontend_test.cc index 54b863d657c7..cbf47be78841 100644 --- a/infra/indexer/frontend/frontend_test.cc +++ b/infra/indexer/frontend/frontend_test.cc @@ -100,9 +100,11 @@ TEST(ParseCommandLineTest, HashInsideDoubleQuotes) { } // namespace frontend_internal namespace { +typedef void (*TExtraSourceTreeAction)(const std::filesystem::path&); + std::unique_ptr GetSnippetIndex( std::string code, const std::vector& extra_args = {}, - bool fail_on_error = false) { + bool fail_on_error = false, TExtraSourceTreeAction extra_action = nullptr) { auto source_dir = std::filesystem::path(::testing::TempDir()) / "src"; std::filesystem::remove_all(source_dir); CHECK(std::filesystem::create_directory(source_dir)); @@ -114,14 +116,16 @@ std::unique_ptr GetSnippetIndex( std::string source_file_path = (source_dir / "snippet.cc").string(); std::string source_dir_path = source_dir.string(); + if (extra_action != nullptr) { + extra_action(source_dir); + } + auto index_dir = std::filesystem::path(::testing::TempDir()) / "idx"; std::filesystem::remove_all(index_dir); CHECK(std::filesystem::create_directory(index_dir)); std::string index_dir_path = index_dir.string(); std::string sysroot_path = "/"; - FileCopier file_copier(source_dir_path, index_dir_path, {sysroot_path}); - std::unique_ptr merge_queue = MergeQueue::Create(1); auto index_action = std::make_unique(file_copier, *merge_queue); const bool result = clang::tooling::runToolOnCodeWithArgs( @@ -3863,5 +3867,40 @@ TEST(FrontendTest, AliasedSymbol) { EXPECT_HAS_ENTITY(index, Entity::Kind::kFunction, "", "bar", "()", "snippet.cc", 2, 2); } + +TEST(FrontendTest, GhostFileLocations) { + FlatIndex index = + std::move( + *GetSnippetIndex( + /*code=*/"#include \"ghostfile.h\"\n", + /*extra_args=*/{}, + /*fail_on_error=*/true, + /*extra_action=*/ + [](const std::filesystem::path& source_dir) { + std::ofstream ghost_file(source_dir / "ghostfile.h"); + ghost_file + << "// Copyright 2025 Google Inc. All rights reserved."; + CHECK(ghost_file.good()); + })) + .Export(); + + bool found_self = false; + bool found_include = false; + bool found_other = false; + for (const Location& location : index.locations) { + if (location.is_whole_file()) { + if (location.path().ends_with("snippet.cc")) { + found_self = true; + } else if (location.path().ends_with("ghostfile.h")) { + found_include = true; + } + } else if (location.is_real()) { + found_other = true; + } + } + EXPECT_TRUE(found_self); + EXPECT_TRUE(found_include); + EXPECT_FALSE(found_other); +} } // namespace indexer } // namespace oss_fuzz diff --git a/infra/indexer/frontend/index_action.cc b/infra/indexer/frontend/index_action.cc index acabee106530..3c3557d1a9ef 100644 --- a/infra/indexer/frontend/index_action.cc +++ b/infra/indexer/frontend/index_action.cc @@ -20,9 +20,11 @@ #include #include "indexer/frontend/ast_visitor.h" +#include "indexer/frontend/common.h" #include "indexer/frontend/pp_callbacks.h" #include "indexer/index/file_copier.h" #include "indexer/index/in_memory_index.h" +#include "indexer/index/types.h" #include "indexer/merge_queue.h" #include "absl/flags/flag.h" #include "absl/log/check.h" @@ -30,6 +32,7 @@ #include "absl/strings/string_view.h" #include "clang/AST/ASTConsumer.h" #include "clang/Frontend/CompilerInstance.h" +#include "clang/Frontend/Utils.h" #include "clang/Lex/Pragma.h" #include "clang/Lex/Preprocessor.h" #include "llvm/ADT/StringRef.h" @@ -62,7 +65,10 @@ IndexAction::IndexAction(FileCopier& file_copier, MergeQueue& merge_queue) bool IndexAction::BeginSourceFileAction(clang::CompilerInstance& compiler) { CHECK(index_); + dependencies_collector_ = std::make_unique(); + clang::Preprocessor& preprocessor = compiler.getPreprocessor(); + dependencies_collector_->attachToPreprocessor(preprocessor); preprocessor.addPPCallbacks( std::make_unique(*index_, compiler.getSourceManager())); for (const std::string& ignored_pragma : @@ -75,7 +81,24 @@ bool IndexAction::BeginSourceFileAction(clang::CompilerInstance& compiler) { return !absl::EndsWith(compiler.getFrontendOpts().Inputs[0].getFile(), ".S"); } -void IndexAction::EndSourceFileAction() { merge_queue_.Add(std::move(index_)); } +void IndexAction::EndSourceFileAction() { + const clang::SourceManager& source_manager = + getCompilerInstance().getSourceManager(); + for (const std::string& filename : + dependencies_collector_->getDependencies()) { + if (!IsRealPath(filename)) { + continue; + } + const auto absolute_path = + ToNormalizedAbsolutePath(filename, source_manager); + // Create a "whole file" location per filename to make sure files without + // indexed symbols are still copied and e.g. accounted for in deltas. + index_->GetLocationId(Location::WholeFile(absolute_path)); + } + dependencies_collector_.reset(); + + merge_queue_.Add(std::move(index_)); +} std::unique_ptr IndexAction::CreateASTConsumer( clang::CompilerInstance& compiler, llvm::StringRef path) { diff --git a/infra/indexer/frontend/index_action.h b/infra/indexer/frontend/index_action.h index 5b00e5487ecb..7cd516c73673 100644 --- a/infra/indexer/frontend/index_action.h +++ b/infra/indexer/frontend/index_action.h @@ -22,11 +22,18 @@ #include "indexer/merge_queue.h" #include "clang/AST/ASTConsumer.h" #include "clang/Frontend/FrontendAction.h" +#include "clang/Frontend/Utils.h" #include "clang/Tooling/Tooling.h" #include "llvm/ADT/StringRef.h" namespace oss_fuzz { namespace indexer { +class AllDependenciesCollector : public clang::DependencyCollector { + public: + // Also include files from the "system" locations. + bool needSystemDependencies() override { return true; } +}; + // IndexAction provides the entry-point for the indexing tooling. This should // typically not be used directly, and the functions exposed in // indexer/frontend.h should be used instead. @@ -43,6 +50,7 @@ class IndexAction : public clang::ASTFrontendAction { private: std::unique_ptr index_; MergeQueue& merge_queue_; + std::unique_ptr dependencies_collector_; }; class IndexActionFactory : public clang::tooling::FrontendActionFactory { diff --git a/infra/indexer/index/in_memory_index.h b/infra/indexer/index/in_memory_index.h index 46894b98400f..3002f426e82e 100644 --- a/infra/indexer/index/in_memory_index.h +++ b/infra/indexer/index/in_memory_index.h @@ -49,7 +49,8 @@ class InMemoryIndex { // The `GetXxxId` functions return the id of an existing, matching object if // there is already one in the index, or allocate a new id if there is not an // identical object in the index. - // `GetLocationId` expects a location with an absolute path if not built-in. + // `GetLocationId` expects a location with an absolute path if not built-in; + // use `ToNormalizedAbsolutePath` to obtain one. LocationId GetLocationId(Location location); EntityId GetEntityId(const Entity& entity); const Entity& GetEntityById(EntityId entity_id) const; diff --git a/infra/indexer/index/types.h b/infra/indexer/index/types.h index 0af0067ca4ed..f06a4b93a302 100644 --- a/infra/indexer/index/types.h +++ b/infra/indexer/index/types.h @@ -53,11 +53,18 @@ class Location { public: Location(absl::string_view path, uint32_t start_line, uint32_t end_line); + static Location WholeFile(absl::string_view path) { + return Location(path, /*start_line=*/0, /*end_line=*/0); + } + inline const std::string& path() const { return path_; } inline uint32_t start_line() const { return start_line_; } inline uint32_t end_line() const { return end_line_; } inline bool is_real() const { return IsRealPath(path()); } + inline bool is_whole_file() const { + return start_line_ == 0 && end_line_ == 0; + } private: friend class InMemoryIndex;