Skip to content

Commit 21599c9

Browse files
OSS-Fuzz Teamcopybara-github
authored andcommitted
Indexer: Register every encountered file with the help of special "locations"
Indexer-PiperOrigin-RevId: 824798756
1 parent 96cf5fd commit 21599c9

File tree

7 files changed

+101
-18
lines changed

7 files changed

+101
-18
lines changed

infra/indexer/frontend/common.cc

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,12 @@
1515
#include "indexer/frontend/common.h"
1616

1717
#include <cstdint>
18-
#include <cstdlib>
1918
#include <filesystem> // NOLINT
20-
#include <iostream>
2119
#include <string>
2220

2321
#include "indexer/index/in_memory_index.h"
2422
#include "indexer/index/types.h"
23+
#include "absl/log/check.h"
2524
#include "absl/strings/string_view.h"
2625
#include "clang/Basic/SourceLocation.h"
2726
#include "clang/Basic/SourceManager.h"
@@ -30,10 +29,15 @@
3029
namespace oss_fuzz {
3130
namespace indexer {
3231

33-
std::string CleanPath(absl::string_view path, absl::string_view cwd) {
32+
std::string ToNormalizedAbsolutePath(
33+
absl::string_view path, const clang::SourceManager& source_manager) {
3434
std::filesystem::path native_path = std::filesystem::path(path);
3535
if (!native_path.is_absolute()) {
36-
native_path = std::filesystem::path(cwd);
36+
llvm::ErrorOr<std::string> cwd = source_manager.getFileManager()
37+
.getVirtualFileSystem()
38+
.getCurrentWorkingDirectory();
39+
QCHECK(cwd) << "unable to get cwd";
40+
native_path = std::filesystem::path(*cwd);
3741
native_path.append(path);
3842
}
3943
return native_path.lexically_normal();
@@ -83,17 +87,9 @@ LocationId GetLocationId(InMemoryIndex& index,
8387
end_line = start_line;
8488
}
8589

86-
llvm::ErrorOr<std::string> cwd = source_manager.getFileManager()
87-
.getVirtualFileSystem()
88-
.getCurrentWorkingDirectory();
89-
if (!cwd) {
90-
std::cerr << "unable to get cwd\n";
91-
exit(1);
92-
}
93-
9490
if (IsRealPath(path)) {
9591
// This is a real file path, so normalize it.
96-
path = CleanPath(path, *cwd);
92+
path = ToNormalizedAbsolutePath(path, source_manager);
9793
}
9894
return index.GetLocationId({path, start_line, end_line});
9995
}

infra/indexer/frontend/common.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,21 @@
1515
#ifndef OSS_FUZZ_INFRA_INDEXER_FRONTEND_COMMON_H_
1616
#define OSS_FUZZ_INFRA_INDEXER_FRONTEND_COMMON_H_
1717

18+
#include <string>
19+
1820
#include "indexer/index/in_memory_index.h"
1921
#include "indexer/index/types.h"
22+
#include "absl/strings/string_view.h"
2023
#include "clang/Basic/SourceLocation.h"
24+
#include "clang/Basic/SourceManager.h"
2125

2226
namespace oss_fuzz {
2327
namespace indexer {
28+
// Converts a source-level `path` into a normalized absolute form suitable for
29+
// passing to the indexer as a location path.
30+
std::string ToNormalizedAbsolutePath(
31+
absl::string_view path, const clang::SourceManager& source_manager);
32+
2433
// Converts a pair of `SourceLocation` to a `LocationId` for a location in the
2534
// index.
2635
LocationId GetLocationId(InMemoryIndex& index,

infra/indexer/frontend/frontend_test.cc

Lines changed: 42 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,11 @@ TEST(ParseCommandLineTest, HashInsideDoubleQuotes) {
100100
} // namespace frontend_internal
101101

102102
namespace {
103+
typedef void (*TExtraSourceTreeAction)(const std::filesystem::path&);
104+
103105
std::unique_ptr<InMemoryIndex> GetSnippetIndex(
104106
std::string code, const std::vector<std::string>& extra_args = {},
105-
bool fail_on_error = false) {
107+
bool fail_on_error = false, TExtraSourceTreeAction extra_action = nullptr) {
106108
auto source_dir = std::filesystem::path(::testing::TempDir()) / "src";
107109
std::filesystem::remove_all(source_dir);
108110
CHECK(std::filesystem::create_directory(source_dir));
@@ -114,14 +116,16 @@ std::unique_ptr<InMemoryIndex> GetSnippetIndex(
114116
std::string source_file_path = (source_dir / "snippet.cc").string();
115117
std::string source_dir_path = source_dir.string();
116118

119+
if (extra_action != nullptr) {
120+
extra_action(source_dir);
121+
}
122+
117123
auto index_dir = std::filesystem::path(::testing::TempDir()) / "idx";
118124
std::filesystem::remove_all(index_dir);
119125
CHECK(std::filesystem::create_directory(index_dir));
120126
std::string index_dir_path = index_dir.string();
121127
std::string sysroot_path = "/";
122-
123128
FileCopier file_copier(source_dir_path, index_dir_path, {sysroot_path});
124-
125129
std::unique_ptr<MergeQueue> merge_queue = MergeQueue::Create(1);
126130
auto index_action = std::make_unique<IndexAction>(file_copier, *merge_queue);
127131
const bool result = clang::tooling::runToolOnCodeWithArgs(
@@ -3863,5 +3867,40 @@ TEST(FrontendTest, AliasedSymbol) {
38633867
EXPECT_HAS_ENTITY(index, Entity::Kind::kFunction, "", "bar", "()",
38643868
"snippet.cc", 2, 2);
38653869
}
3870+
3871+
TEST(FrontendTest, GhostFileLocations) {
3872+
FlatIndex index =
3873+
std::move(
3874+
*GetSnippetIndex(
3875+
/*code=*/"#include \"ghostfile.h\"\n",
3876+
/*extra_args=*/{},
3877+
/*fail_on_error=*/true,
3878+
/*extra_action=*/
3879+
[](const std::filesystem::path& source_dir) {
3880+
std::ofstream ghost_file(source_dir / "ghostfile.h");
3881+
ghost_file
3882+
<< "// Copyright 2025 Google Inc. All rights reserved.";
3883+
CHECK(ghost_file.good());
3884+
}))
3885+
.Export();
3886+
3887+
bool found_self = false;
3888+
bool found_include = false;
3889+
bool found_other = false;
3890+
for (const Location& location : index.locations) {
3891+
if (location.is_whole_file()) {
3892+
if (location.path().ends_with("snippet.cc")) {
3893+
found_self = true;
3894+
} else if (location.path().ends_with("ghostfile.h")) {
3895+
found_include = true;
3896+
}
3897+
} else if (location.is_real()) {
3898+
found_other = true;
3899+
}
3900+
}
3901+
EXPECT_TRUE(found_self);
3902+
EXPECT_TRUE(found_include);
3903+
EXPECT_FALSE(found_other);
3904+
}
38663905
} // namespace indexer
38673906
} // namespace oss_fuzz

infra/indexer/frontend/index_action.cc

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,16 +20,19 @@
2020
#include <vector>
2121

2222
#include "indexer/frontend/ast_visitor.h"
23+
#include "indexer/frontend/common.h"
2324
#include "indexer/frontend/pp_callbacks.h"
2425
#include "indexer/index/file_copier.h"
2526
#include "indexer/index/in_memory_index.h"
27+
#include "indexer/index/types.h"
2628
#include "indexer/merge_queue.h"
2729
#include "absl/flags/flag.h"
2830
#include "absl/log/check.h"
2931
#include "absl/strings/match.h"
3032
#include "absl/strings/string_view.h"
3133
#include "clang/AST/ASTConsumer.h"
3234
#include "clang/Frontend/CompilerInstance.h"
35+
#include "clang/Frontend/Utils.h"
3336
#include "clang/Lex/Pragma.h"
3437
#include "clang/Lex/Preprocessor.h"
3538
#include "llvm/ADT/StringRef.h"
@@ -62,7 +65,10 @@ IndexAction::IndexAction(FileCopier& file_copier, MergeQueue& merge_queue)
6265
bool IndexAction::BeginSourceFileAction(clang::CompilerInstance& compiler) {
6366
CHECK(index_);
6467

68+
dependencies_collector_ = std::make_unique<AllDependenciesCollector>();
69+
6570
clang::Preprocessor& preprocessor = compiler.getPreprocessor();
71+
dependencies_collector_->attachToPreprocessor(preprocessor);
6672
preprocessor.addPPCallbacks(
6773
std::make_unique<PpCallbacks>(*index_, compiler.getSourceManager()));
6874
for (const std::string& ignored_pragma :
@@ -75,7 +81,24 @@ bool IndexAction::BeginSourceFileAction(clang::CompilerInstance& compiler) {
7581
return !absl::EndsWith(compiler.getFrontendOpts().Inputs[0].getFile(), ".S");
7682
}
7783

78-
void IndexAction::EndSourceFileAction() { merge_queue_.Add(std::move(index_)); }
84+
void IndexAction::EndSourceFileAction() {
85+
const clang::SourceManager& source_manager =
86+
getCompilerInstance().getSourceManager();
87+
for (const std::string& filename :
88+
dependencies_collector_->getDependencies()) {
89+
if (!IsRealPath(filename)) {
90+
continue;
91+
}
92+
const auto absolute_path =
93+
ToNormalizedAbsolutePath(filename, source_manager);
94+
// Create a "whole file" location per filename to make sure files without
95+
// indexed symbols are still copied and e.g. accounted for in deltas.
96+
index_->GetLocationId(Location::WholeFile(absolute_path));
97+
}
98+
dependencies_collector_.reset();
99+
100+
merge_queue_.Add(std::move(index_));
101+
}
79102

80103
std::unique_ptr<clang::ASTConsumer> IndexAction::CreateASTConsumer(
81104
clang::CompilerInstance& compiler, llvm::StringRef path) {

infra/indexer/frontend/index_action.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,18 @@
2222
#include "indexer/merge_queue.h"
2323
#include "clang/AST/ASTConsumer.h"
2424
#include "clang/Frontend/FrontendAction.h"
25+
#include "clang/Frontend/Utils.h"
2526
#include "clang/Tooling/Tooling.h"
2627
#include "llvm/ADT/StringRef.h"
2728

2829
namespace oss_fuzz {
2930
namespace indexer {
31+
class AllDependenciesCollector : public clang::DependencyCollector {
32+
public:
33+
// Also include files from the "system" locations.
34+
bool needSystemDependencies() override { return true; }
35+
};
36+
3037
// IndexAction provides the entry-point for the indexing tooling. This should
3138
// typically not be used directly, and the functions exposed in
3239
// indexer/frontend.h should be used instead.
@@ -43,6 +50,7 @@ class IndexAction : public clang::ASTFrontendAction {
4350
private:
4451
std::unique_ptr<InMemoryIndex> index_;
4552
MergeQueue& merge_queue_;
53+
std::unique_ptr<AllDependenciesCollector> dependencies_collector_;
4654
};
4755

4856
class IndexActionFactory : public clang::tooling::FrontendActionFactory {

infra/indexer/index/in_memory_index.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ class InMemoryIndex {
4949
// The `GetXxxId` functions return the id of an existing, matching object if
5050
// there is already one in the index, or allocate a new id if there is not an
5151
// identical object in the index.
52-
// `GetLocationId` expects a location with an absolute path if not built-in.
52+
// `GetLocationId` expects a location with an absolute path if not built-in;
53+
// use `ToNormalizedAbsolutePath` to obtain one.
5354
LocationId GetLocationId(Location location);
5455
EntityId GetEntityId(const Entity& entity);
5556
const Entity& GetEntityById(EntityId entity_id) const;

infra/indexer/index/types.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,11 +53,18 @@ class Location {
5353
public:
5454
Location(absl::string_view path, uint32_t start_line, uint32_t end_line);
5555

56+
static Location WholeFile(absl::string_view path) {
57+
return Location(path, /*start_line=*/0, /*end_line=*/0);
58+
}
59+
5660
inline const std::string& path() const { return path_; }
5761
inline uint32_t start_line() const { return start_line_; }
5862
inline uint32_t end_line() const { return end_line_; }
5963

6064
inline bool is_real() const { return IsRealPath(path()); }
65+
inline bool is_whole_file() const {
66+
return start_line_ == 0 && end_line_ == 0;
67+
}
6168

6269
private:
6370
friend class InMemoryIndex;

0 commit comments

Comments
 (0)