Skip to content

Commit 7f1d927

Browse files
authored
Merge pull request #14360 from lovesegfault/scan-for-references-detailed
feat(libstore): add scanForReferencesDeep and use it for why-depends
2 parents ea17cc1 + 6129aee commit 7f1d927

File tree

4 files changed

+328
-45
lines changed

4 files changed

+328
-45
lines changed

src/libstore-tests/references.cc

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
#include "nix/store/references.hh"
2+
#include "nix/store/path-references.hh"
3+
#include "nix/util/memory-source-accessor.hh"
24

35
#include <gtest/gtest.h>
46

@@ -79,4 +81,145 @@ TEST(references, scan)
7981
}
8082
}
8183

84+
TEST(references, scanForReferencesDeep)
85+
{
86+
using File = MemorySourceAccessor::File;
87+
88+
// Create store paths to search for
89+
StorePath path1{"dc04vv14dak1c1r48qa0m23vr9jy8sm0-foo"};
90+
StorePath path2{"zc842j0rz61mjsp3h3wp5ly71ak6qgdn-bar"};
91+
StorePath path3{"a5cn2i4b83gnsm60d38l3kgb8qfplm11-baz"};
92+
93+
StorePathSet refs{path1, path2, path3};
94+
95+
std::string_view hash1 = path1.hashPart();
96+
std::string_view hash2 = path2.hashPart();
97+
std::string_view hash3 = path3.hashPart();
98+
99+
// Create an in-memory file system with various reference patterns
100+
auto accessor = make_ref<MemorySourceAccessor>();
101+
accessor->root = File::Directory{
102+
.contents{
103+
{
104+
// file1.txt: contains hash1
105+
"file1.txt",
106+
File::Regular{
107+
.contents = "This file references " + hash1 + " in its content",
108+
},
109+
},
110+
{
111+
// file2.txt: contains hash2 and hash3
112+
"file2.txt",
113+
File::Regular{
114+
.contents = "Multiple refs: " + hash2 + " and also " + hash3,
115+
},
116+
},
117+
{
118+
// file3.txt: contains no references
119+
"file3.txt",
120+
File::Regular{
121+
.contents = "This file has no store path references at all",
122+
},
123+
},
124+
{
125+
// subdir: a subdirectory
126+
"subdir",
127+
File::Directory{
128+
.contents{
129+
{
130+
// subdir/file4.txt: contains hash1 again
131+
"file4.txt",
132+
File::Regular{
133+
.contents = "Subdirectory file with " + hash1,
134+
},
135+
},
136+
},
137+
},
138+
},
139+
{
140+
// link1: a symlink that contains a reference in its target
141+
"link1",
142+
File::Symlink{
143+
.target = hash2 + "-target",
144+
},
145+
},
146+
},
147+
};
148+
149+
// Test the callback-based API
150+
{
151+
std::map<CanonPath, StorePathSet> foundRefs;
152+
153+
scanForReferencesDeep(*accessor, CanonPath::root, refs, [&](FileRefScanResult result) {
154+
foundRefs[std::move(result.filePath)] = std::move(result.foundRefs);
155+
});
156+
157+
// Verify we found the expected references
158+
EXPECT_EQ(foundRefs.size(), 4); // file1, file2, file4, link1
159+
160+
// Check file1.txt found path1
161+
{
162+
CanonPath f1Path("/file1.txt");
163+
auto it = foundRefs.find(f1Path);
164+
ASSERT_TRUE(it != foundRefs.end());
165+
EXPECT_EQ(it->second.size(), 1);
166+
EXPECT_TRUE(it->second.count(path1));
167+
}
168+
169+
// Check file2.txt found path2 and path3
170+
{
171+
CanonPath f2Path("/file2.txt");
172+
auto it = foundRefs.find(f2Path);
173+
ASSERT_TRUE(it != foundRefs.end());
174+
EXPECT_EQ(it->second.size(), 2);
175+
EXPECT_TRUE(it->second.count(path2));
176+
EXPECT_TRUE(it->second.count(path3));
177+
}
178+
179+
// Check file3.txt is not in results (no refs)
180+
{
181+
CanonPath f3Path("/file3.txt");
182+
EXPECT_FALSE(foundRefs.count(f3Path));
183+
}
184+
185+
// Check subdir/file4.txt found path1
186+
{
187+
CanonPath f4Path("/subdir/file4.txt");
188+
auto it = foundRefs.find(f4Path);
189+
ASSERT_TRUE(it != foundRefs.end());
190+
EXPECT_EQ(it->second.size(), 1);
191+
EXPECT_TRUE(it->second.count(path1));
192+
}
193+
194+
// Check symlink found path2
195+
{
196+
CanonPath linkPath("/link1");
197+
auto it = foundRefs.find(linkPath);
198+
ASSERT_TRUE(it != foundRefs.end());
199+
EXPECT_EQ(it->second.size(), 1);
200+
EXPECT_TRUE(it->second.count(path2));
201+
}
202+
}
203+
204+
// Test the map-based convenience API
205+
{
206+
auto results = scanForReferencesDeep(*accessor, CanonPath::root, refs);
207+
208+
EXPECT_EQ(results.size(), 4); // file1, file2, file4, link1
209+
210+
// Verify all expected files are in the results
211+
EXPECT_TRUE(results.count(CanonPath("/file1.txt")));
212+
EXPECT_TRUE(results.count(CanonPath("/file2.txt")));
213+
EXPECT_TRUE(results.count(CanonPath("/subdir/file4.txt")));
214+
EXPECT_TRUE(results.count(CanonPath("/link1")));
215+
EXPECT_FALSE(results.count(CanonPath("/file3.txt")));
216+
217+
// Verify the references found in each file are correct
218+
EXPECT_EQ(results.at(CanonPath("/file1.txt")), StorePathSet{path1});
219+
EXPECT_EQ(results.at(CanonPath("/file2.txt")), StorePathSet({path2, path3}));
220+
EXPECT_EQ(results.at(CanonPath("/subdir/file4.txt")), StorePathSet{path1});
221+
EXPECT_EQ(results.at(CanonPath("/link1")), StorePathSet{path2});
222+
}
223+
}
224+
82225
} // namespace nix

src/libstore/include/nix/store/path-references.hh

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33

44
#include "nix/store/references.hh"
55
#include "nix/store/path.hh"
6+
#include "nix/util/source-accessor.hh"
7+
8+
#include <functional>
9+
#include <vector>
610

711
namespace nix {
812

@@ -21,4 +25,57 @@ public:
2125
StorePathSet getResultPaths();
2226
};
2327

28+
/**
29+
* Result of scanning a single file for references.
30+
*/
31+
struct FileRefScanResult
32+
{
33+
CanonPath filePath; ///< The file that was scanned
34+
StorePathSet foundRefs; ///< Which store paths were found in this file
35+
};
36+
37+
/**
38+
* Scan a store path tree and report which references appear in which files.
39+
*
40+
* This is like scanForReferences() but provides per-file granularity.
41+
* Useful for cycle detection and detailed dependency analysis like `nix why-depends --precise`.
42+
*
43+
* The function walks the tree using the provided accessor and streams each file's
44+
* contents through a RefScanSink to detect hash references. For each file that
45+
* contains at least one reference, a callback is invoked with the file path and
46+
* the set of references found.
47+
*
48+
* Note: This function only searches for the hash part of store paths (e.g.,
49+
* "dc04vv14dak1c1r48qa0m23vr9jy8sm0"), not the name part. A store path like
50+
* "/nix/store/dc04vv14dak1c1r48qa0m23vr9jy8sm0-foo" will be detected if the
51+
* hash appears anywhere in the scanned content, regardless of the "-foo" suffix.
52+
*
53+
* @param accessor Source accessor to read the tree
54+
* @param rootPath Root path to scan
55+
* @param refs Set of store paths to search for
56+
* @param callback Called for each file that contains at least one reference
57+
*/
58+
void scanForReferencesDeep(
59+
SourceAccessor & accessor,
60+
const CanonPath & rootPath,
61+
const StorePathSet & refs,
62+
std::function<void(FileRefScanResult)> callback);
63+
64+
/**
65+
* Scan a store path tree and return which references appear in which files.
66+
*
67+
* This is a convenience wrapper around the callback-based scanForReferencesDeep()
68+
* that collects all results into a map for efficient lookups.
69+
*
70+
* Note: This function only searches for the hash part of store paths, not the name part.
71+
* See the callback-based overload for details.
72+
*
73+
* @param accessor Source accessor to read the tree
74+
* @param rootPath Root path to scan
75+
* @param refs Set of store paths to search for
76+
* @return Map from file paths to the set of references found in each file
77+
*/
78+
std::map<CanonPath, StorePathSet>
79+
scanForReferencesDeep(SourceAccessor & accessor, const CanonPath & rootPath, const StorePathSet & refs);
80+
2481
} // namespace nix

src/libstore/path-references.cc

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
#include "nix/store/path-references.hh"
22
#include "nix/util/hash.hh"
33
#include "nix/util/archive.hh"
4+
#include "nix/util/source-accessor.hh"
5+
#include "nix/util/canon-path.hh"
6+
#include "nix/util/logging.hh"
47

58
#include <map>
69
#include <cstdlib>
710
#include <mutex>
811
#include <algorithm>
12+
#include <functional>
913

1014
namespace nix {
1115

@@ -54,4 +58,90 @@ StorePathSet scanForReferences(Sink & toTee, const Path & path, const StorePathS
5458
return refsSink.getResultPaths();
5559
}
5660

61+
void scanForReferencesDeep(
62+
SourceAccessor & accessor,
63+
const CanonPath & rootPath,
64+
const StorePathSet & refs,
65+
std::function<void(FileRefScanResult)> callback)
66+
{
67+
// Recursive tree walker
68+
auto walk = [&](this auto & self, const CanonPath & path) -> void {
69+
auto stat = accessor.lstat(path);
70+
71+
switch (stat.type) {
72+
case SourceAccessor::tRegular: {
73+
// Create a fresh sink for each file to independently detect references.
74+
// RefScanSink accumulates found hashes globally - once a hash is found,
75+
// it remains in the result set. If we reused the same sink across files,
76+
// we couldn't distinguish which files contain which references, as a hash
77+
// found in an earlier file wouldn't be reported when found in later files.
78+
PathRefScanSink sink = PathRefScanSink::fromPaths(refs);
79+
80+
// Scan this file by streaming its contents through the sink
81+
accessor.readFile(path, sink);
82+
83+
// Get the references found in this file
84+
auto foundRefs = sink.getResultPaths();
85+
86+
// Report if we found anything in this file
87+
if (!foundRefs.empty()) {
88+
debug("scanForReferencesDeep: found %d references in %s", foundRefs.size(), path.abs());
89+
callback(FileRefScanResult{.filePath = path, .foundRefs = std::move(foundRefs)});
90+
}
91+
break;
92+
}
93+
94+
case SourceAccessor::tDirectory: {
95+
// Recursively scan directory contents
96+
auto entries = accessor.readDirectory(path);
97+
for (const auto & [name, entryType] : entries) {
98+
self(path / name);
99+
}
100+
break;
101+
}
102+
103+
case SourceAccessor::tSymlink: {
104+
// Create a fresh sink for the symlink target (same reason as regular files)
105+
PathRefScanSink sink = PathRefScanSink::fromPaths(refs);
106+
107+
// Scan symlink target for references
108+
auto target = accessor.readLink(path);
109+
sink(std::string_view(target));
110+
111+
// Get the references found in this symlink target
112+
auto foundRefs = sink.getResultPaths();
113+
114+
if (!foundRefs.empty()) {
115+
debug("scanForReferencesDeep: found %d references in symlink %s", foundRefs.size(), path.abs());
116+
callback(FileRefScanResult{.filePath = path, .foundRefs = std::move(foundRefs)});
117+
}
118+
break;
119+
}
120+
121+
case SourceAccessor::tChar:
122+
case SourceAccessor::tBlock:
123+
case SourceAccessor::tSocket:
124+
case SourceAccessor::tFifo:
125+
case SourceAccessor::tUnknown:
126+
default:
127+
throw Error("file '%s' has an unsupported type", path.abs());
128+
}
129+
};
130+
131+
// Start the recursive walk from the root
132+
walk(rootPath);
133+
}
134+
135+
std::map<CanonPath, StorePathSet>
136+
scanForReferencesDeep(SourceAccessor & accessor, const CanonPath & rootPath, const StorePathSet & refs)
137+
{
138+
std::map<CanonPath, StorePathSet> results;
139+
140+
scanForReferencesDeep(accessor, rootPath, refs, [&](FileRefScanResult result) {
141+
results[std::move(result.filePath)] = std::move(result.foundRefs);
142+
});
143+
144+
return results;
145+
}
146+
57147
} // namespace nix

0 commit comments

Comments
 (0)