From ab096b1d7f32484a6b5707ff6a81ccab2a30b1c0 Mon Sep 17 00:00:00 2001 From: Matheus Aoki Hunsche Date: Fri, 31 Oct 2025 14:36:03 -0300 Subject: [PATCH 1/2] =?UTF-8?q?Revert=20"Revert=20"Indexer:=20Register=20e?= =?UTF-8?q?very=20encountered=20file=20with=20the=20help=20of=20spe?= =?UTF-8?q?=E2=80=A6"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 358edc7ee258c74e95330b3310500911ab583d49. --- infra/indexer/frontend/common.cc | 22 +++++------- infra/indexer/frontend/common.h | 9 +++++ infra/indexer/frontend/frontend_test.cc | 45 +++++++++++++++++++++++-- infra/indexer/frontend/index_action.cc | 25 +++++++++++++- infra/indexer/frontend/index_action.h | 8 +++++ infra/indexer/index/in_memory_index.h | 3 +- infra/indexer/index/types.h | 7 ++++ infra/indexer/ubuntu-20-04.Dockerfile | 24 ------------- infra/indexer/ubuntu-24-04.Dockerfile | 24 ------------- 9 files changed, 101 insertions(+), 66 deletions(-) delete mode 100644 infra/indexer/ubuntu-20-04.Dockerfile delete mode 100644 infra/indexer/ubuntu-24-04.Dockerfile diff --git a/infra/indexer/frontend/common.cc b/infra/indexer/frontend/common.cc index e549603185f0..947bddf28689 100644 --- a/infra/indexer/frontend/common.cc +++ b/infra/indexer/frontend/common.cc @@ -15,13 +15,12 @@ #include "indexer/frontend/common.h" #include -#include #include // NOLINT -#include #include #include "indexer/index/in_memory_index.h" #include "indexer/index/types.h" +#include "absl/log/check.h" #include "absl/strings/string_view.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" @@ -30,10 +29,15 @@ namespace oss_fuzz { namespace indexer { -std::string CleanPath(absl::string_view path, absl::string_view cwd) { +std::string ToNormalizedAbsolutePath( + absl::string_view path, const clang::SourceManager& source_manager) { std::filesystem::path native_path = std::filesystem::path(path); if (!native_path.is_absolute()) { - native_path = std::filesystem::path(cwd); + llvm::ErrorOr cwd = source_manager.getFileManager() + .getVirtualFileSystem() + .getCurrentWorkingDirectory(); + QCHECK(cwd) << "unable to get cwd"; + native_path = std::filesystem::path(*cwd); native_path.append(path); } return native_path.lexically_normal(); @@ -83,17 +87,9 @@ LocationId GetLocationId(InMemoryIndex& index, end_line = start_line; } - llvm::ErrorOr cwd = source_manager.getFileManager() - .getVirtualFileSystem() - .getCurrentWorkingDirectory(); - if (!cwd) { - std::cerr << "unable to get cwd\n"; - exit(1); - } - if (IsRealPath(path)) { // This is a real file path, so normalize it. - path = CleanPath(path, *cwd); + path = ToNormalizedAbsolutePath(path, source_manager); } return index.GetLocationId({path, start_line, end_line}); } diff --git a/infra/indexer/frontend/common.h b/infra/indexer/frontend/common.h index 94e8898ba6dc..1fe2f1200839 100644 --- a/infra/indexer/frontend/common.h +++ b/infra/indexer/frontend/common.h @@ -15,12 +15,21 @@ #ifndef OSS_FUZZ_INFRA_INDEXER_FRONTEND_COMMON_H_ #define OSS_FUZZ_INFRA_INDEXER_FRONTEND_COMMON_H_ +#include + #include "indexer/index/in_memory_index.h" #include "indexer/index/types.h" +#include "absl/strings/string_view.h" #include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" namespace oss_fuzz { namespace indexer { +// Converts a source-level `path` into a normalized absolute form suitable for +// passing to the indexer as a location path. +std::string ToNormalizedAbsolutePath( + absl::string_view path, const clang::SourceManager& source_manager); + // Converts a pair of `SourceLocation` to a `LocationId` for a location in the // index. LocationId GetLocationId(InMemoryIndex& index, diff --git a/infra/indexer/frontend/frontend_test.cc b/infra/indexer/frontend/frontend_test.cc index 54b863d657c7..cbf47be78841 100644 --- a/infra/indexer/frontend/frontend_test.cc +++ b/infra/indexer/frontend/frontend_test.cc @@ -100,9 +100,11 @@ TEST(ParseCommandLineTest, HashInsideDoubleQuotes) { } // namespace frontend_internal namespace { +typedef void (*TExtraSourceTreeAction)(const std::filesystem::path&); + std::unique_ptr GetSnippetIndex( std::string code, const std::vector& extra_args = {}, - bool fail_on_error = false) { + bool fail_on_error = false, TExtraSourceTreeAction extra_action = nullptr) { auto source_dir = std::filesystem::path(::testing::TempDir()) / "src"; std::filesystem::remove_all(source_dir); CHECK(std::filesystem::create_directory(source_dir)); @@ -114,14 +116,16 @@ std::unique_ptr GetSnippetIndex( std::string source_file_path = (source_dir / "snippet.cc").string(); std::string source_dir_path = source_dir.string(); + if (extra_action != nullptr) { + extra_action(source_dir); + } + auto index_dir = std::filesystem::path(::testing::TempDir()) / "idx"; std::filesystem::remove_all(index_dir); CHECK(std::filesystem::create_directory(index_dir)); std::string index_dir_path = index_dir.string(); std::string sysroot_path = "/"; - FileCopier file_copier(source_dir_path, index_dir_path, {sysroot_path}); - std::unique_ptr merge_queue = MergeQueue::Create(1); auto index_action = std::make_unique(file_copier, *merge_queue); const bool result = clang::tooling::runToolOnCodeWithArgs( @@ -3863,5 +3867,40 @@ TEST(FrontendTest, AliasedSymbol) { EXPECT_HAS_ENTITY(index, Entity::Kind::kFunction, "", "bar", "()", "snippet.cc", 2, 2); } + +TEST(FrontendTest, GhostFileLocations) { + FlatIndex index = + std::move( + *GetSnippetIndex( + /*code=*/"#include \"ghostfile.h\"\n", + /*extra_args=*/{}, + /*fail_on_error=*/true, + /*extra_action=*/ + [](const std::filesystem::path& source_dir) { + std::ofstream ghost_file(source_dir / "ghostfile.h"); + ghost_file + << "// Copyright 2025 Google Inc. All rights reserved."; + CHECK(ghost_file.good()); + })) + .Export(); + + bool found_self = false; + bool found_include = false; + bool found_other = false; + for (const Location& location : index.locations) { + if (location.is_whole_file()) { + if (location.path().ends_with("snippet.cc")) { + found_self = true; + } else if (location.path().ends_with("ghostfile.h")) { + found_include = true; + } + } else if (location.is_real()) { + found_other = true; + } + } + EXPECT_TRUE(found_self); + EXPECT_TRUE(found_include); + EXPECT_FALSE(found_other); +} } // namespace indexer } // namespace oss_fuzz diff --git a/infra/indexer/frontend/index_action.cc b/infra/indexer/frontend/index_action.cc index acabee106530..3c3557d1a9ef 100644 --- a/infra/indexer/frontend/index_action.cc +++ b/infra/indexer/frontend/index_action.cc @@ -20,9 +20,11 @@ #include #include "indexer/frontend/ast_visitor.h" +#include "indexer/frontend/common.h" #include "indexer/frontend/pp_callbacks.h" #include "indexer/index/file_copier.h" #include "indexer/index/in_memory_index.h" +#include "indexer/index/types.h" #include "indexer/merge_queue.h" #include "absl/flags/flag.h" #include "absl/log/check.h" @@ -30,6 +32,7 @@ #include "absl/strings/string_view.h" #include "clang/AST/ASTConsumer.h" #include "clang/Frontend/CompilerInstance.h" +#include "clang/Frontend/Utils.h" #include "clang/Lex/Pragma.h" #include "clang/Lex/Preprocessor.h" #include "llvm/ADT/StringRef.h" @@ -62,7 +65,10 @@ IndexAction::IndexAction(FileCopier& file_copier, MergeQueue& merge_queue) bool IndexAction::BeginSourceFileAction(clang::CompilerInstance& compiler) { CHECK(index_); + dependencies_collector_ = std::make_unique(); + clang::Preprocessor& preprocessor = compiler.getPreprocessor(); + dependencies_collector_->attachToPreprocessor(preprocessor); preprocessor.addPPCallbacks( std::make_unique(*index_, compiler.getSourceManager())); for (const std::string& ignored_pragma : @@ -75,7 +81,24 @@ bool IndexAction::BeginSourceFileAction(clang::CompilerInstance& compiler) { return !absl::EndsWith(compiler.getFrontendOpts().Inputs[0].getFile(), ".S"); } -void IndexAction::EndSourceFileAction() { merge_queue_.Add(std::move(index_)); } +void IndexAction::EndSourceFileAction() { + const clang::SourceManager& source_manager = + getCompilerInstance().getSourceManager(); + for (const std::string& filename : + dependencies_collector_->getDependencies()) { + if (!IsRealPath(filename)) { + continue; + } + const auto absolute_path = + ToNormalizedAbsolutePath(filename, source_manager); + // Create a "whole file" location per filename to make sure files without + // indexed symbols are still copied and e.g. accounted for in deltas. + index_->GetLocationId(Location::WholeFile(absolute_path)); + } + dependencies_collector_.reset(); + + merge_queue_.Add(std::move(index_)); +} std::unique_ptr IndexAction::CreateASTConsumer( clang::CompilerInstance& compiler, llvm::StringRef path) { diff --git a/infra/indexer/frontend/index_action.h b/infra/indexer/frontend/index_action.h index 5b00e5487ecb..7cd516c73673 100644 --- a/infra/indexer/frontend/index_action.h +++ b/infra/indexer/frontend/index_action.h @@ -22,11 +22,18 @@ #include "indexer/merge_queue.h" #include "clang/AST/ASTConsumer.h" #include "clang/Frontend/FrontendAction.h" +#include "clang/Frontend/Utils.h" #include "clang/Tooling/Tooling.h" #include "llvm/ADT/StringRef.h" namespace oss_fuzz { namespace indexer { +class AllDependenciesCollector : public clang::DependencyCollector { + public: + // Also include files from the "system" locations. + bool needSystemDependencies() override { return true; } +}; + // IndexAction provides the entry-point for the indexing tooling. This should // typically not be used directly, and the functions exposed in // indexer/frontend.h should be used instead. @@ -43,6 +50,7 @@ class IndexAction : public clang::ASTFrontendAction { private: std::unique_ptr index_; MergeQueue& merge_queue_; + std::unique_ptr dependencies_collector_; }; class IndexActionFactory : public clang::tooling::FrontendActionFactory { diff --git a/infra/indexer/index/in_memory_index.h b/infra/indexer/index/in_memory_index.h index 46894b98400f..3002f426e82e 100644 --- a/infra/indexer/index/in_memory_index.h +++ b/infra/indexer/index/in_memory_index.h @@ -49,7 +49,8 @@ class InMemoryIndex { // The `GetXxxId` functions return the id of an existing, matching object if // there is already one in the index, or allocate a new id if there is not an // identical object in the index. - // `GetLocationId` expects a location with an absolute path if not built-in. + // `GetLocationId` expects a location with an absolute path if not built-in; + // use `ToNormalizedAbsolutePath` to obtain one. LocationId GetLocationId(Location location); EntityId GetEntityId(const Entity& entity); const Entity& GetEntityById(EntityId entity_id) const; diff --git a/infra/indexer/index/types.h b/infra/indexer/index/types.h index 0af0067ca4ed..f06a4b93a302 100644 --- a/infra/indexer/index/types.h +++ b/infra/indexer/index/types.h @@ -53,11 +53,18 @@ class Location { public: Location(absl::string_view path, uint32_t start_line, uint32_t end_line); + static Location WholeFile(absl::string_view path) { + return Location(path, /*start_line=*/0, /*end_line=*/0); + } + inline const std::string& path() const { return path_; } inline uint32_t start_line() const { return start_line_; } inline uint32_t end_line() const { return end_line_; } inline bool is_real() const { return IsRealPath(path()); } + inline bool is_whole_file() const { + return start_line_ == 0 && end_line_ == 0; + } private: friend class InMemoryIndex; diff --git a/infra/indexer/ubuntu-20-04.Dockerfile b/infra/indexer/ubuntu-20-04.Dockerfile deleted file mode 100644 index 623178d75927..000000000000 --- a/infra/indexer/ubuntu-20-04.Dockerfile +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -################################################################################ - -FROM gcr.io/oss-fuzz-base/base-clang-full:ubuntu-20-04 - -RUN mkdir /indexer -WORKDIR /indexer -COPY . /indexer - -RUN apt-get update && apt-get install -y libsqlite3-dev make zlib1g-dev -RUN mkdir build && cd build && cmake .. && cmake --build . -j -v diff --git a/infra/indexer/ubuntu-24-04.Dockerfile b/infra/indexer/ubuntu-24-04.Dockerfile deleted file mode 100644 index 786e3387b425..000000000000 --- a/infra/indexer/ubuntu-24-04.Dockerfile +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -################################################################################ - -FROM gcr.io/oss-fuzz-base/base-clang-full:ubuntu-24-04 - -RUN mkdir /indexer -WORKDIR /indexer -COPY . /indexer - -RUN apt-get update && apt-get install -y libsqlite3-dev make zlib1g-dev -RUN mkdir build && cd build && cmake .. && cmake --build . -j -v From dc741655509b76887653c1c3daf8f51534cfa43d Mon Sep 17 00:00:00 2001 From: Matheus Hunsche Date: Fri, 31 Oct 2025 17:38:32 +0000 Subject: [PATCH 2/2] Revert indexer Dockerfiles to master --- infra/indexer/ubuntu-20-04.Dockerfile | 24 ++++++++++++++++++++++++ infra/indexer/ubuntu-24-04.Dockerfile | 24 ++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 infra/indexer/ubuntu-20-04.Dockerfile create mode 100644 infra/indexer/ubuntu-24-04.Dockerfile diff --git a/infra/indexer/ubuntu-20-04.Dockerfile b/infra/indexer/ubuntu-20-04.Dockerfile new file mode 100644 index 000000000000..623178d75927 --- /dev/null +++ b/infra/indexer/ubuntu-20-04.Dockerfile @@ -0,0 +1,24 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +FROM gcr.io/oss-fuzz-base/base-clang-full:ubuntu-20-04 + +RUN mkdir /indexer +WORKDIR /indexer +COPY . /indexer + +RUN apt-get update && apt-get install -y libsqlite3-dev make zlib1g-dev +RUN mkdir build && cd build && cmake .. && cmake --build . -j -v diff --git a/infra/indexer/ubuntu-24-04.Dockerfile b/infra/indexer/ubuntu-24-04.Dockerfile new file mode 100644 index 000000000000..786e3387b425 --- /dev/null +++ b/infra/indexer/ubuntu-24-04.Dockerfile @@ -0,0 +1,24 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +FROM gcr.io/oss-fuzz-base/base-clang-full:ubuntu-24-04 + +RUN mkdir /indexer +WORKDIR /indexer +COPY . /indexer + +RUN apt-get update && apt-get install -y libsqlite3-dev make zlib1g-dev +RUN mkdir build && cd build && cmake .. && cmake --build . -j -v