Skip to content

Commit 2e4a78b

Browse files
committed
Fix files with very long paths always being treated as unindexed
* bdx/index.py (SymbolIndex.all_files): Rework to yield all paths properly even if their db terms have been truncated due to length limits. * tests/test_index.py (test_indexing_very_long_path): New test.
1 parent 34f3fcd commit 2e4a78b

File tree

2 files changed

+41
-3
lines changed

2 files changed

+41
-3
lines changed

bdx/index.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -762,10 +762,26 @@ def delete_file(self, file: Path):
762762

763763
def all_files(self) -> Iterator[Path]:
764764
"""Yield all the files indexed in this SymbolIndex."""
765+
seen_paths = set()
766+
765767
for value in self.iter_prefix("path", ""):
766-
path = Path(value)
767-
if path.is_absolute():
768-
yield path
768+
# Check if the path stored in database has been truncated
769+
# because it's length exceeded MAX_TERM_SIZE. If so, then
770+
# we need to search this term by wildcard and return all
771+
# paths that are actually stored in the data of each
772+
# document.
773+
raw_term = self.schema["path"].prefix.encode() + value.encode()
774+
if len(raw_term) == MAX_TERM_SIZE:
775+
query = f"path:{value}*"
776+
results = self.search(query)
777+
paths = set([x.path for x in results]).difference(seen_paths)
778+
seen_paths.update(paths)
779+
yield from paths
780+
else:
781+
path = Path(value)
782+
if path.is_absolute() and path not in seen_paths:
783+
seen_paths.add(path)
784+
yield path
769785

770786
def iter_prefix(self, field: str, value_prefix: str) -> Iterator[str]:
771787
"""Return all the possible values for ``field`` with given prefix."""

tests/test_index.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
# isort: off
1010
from bdx.binary import Exclusion, SymbolType
1111
from bdx.index import (
12+
MAX_TERM_SIZE,
1213
IndexingOptions,
1314
SymbolIndex,
1415
SymbolNameField,
@@ -462,6 +463,27 @@ def test_indexing_not_triggered_if_mtime_not_changed(tmp_path):
462463
assert "quux" not in symbols
463464

464465

466+
def test_indexing_very_long_path(tmp_path):
467+
index_path = tmp_path / "index"
468+
dir = tmp_path / "build"
469+
dir.mkdir()
470+
471+
filename = "0" * (os.pathconf(tmp_path, "PC_NAME_MAX") - 2) + ".o"
472+
file: Path = dir / filename
473+
short_file: Path = dir / "short.o"
474+
475+
assert (
476+
len(str(file)) + len(SymbolIndex.SCHEMA["path"].prefix) > MAX_TERM_SIZE
477+
)
478+
479+
_compile_file(file, "void foo() {}", ["-c"])
480+
_compile_file(short_file, "void shortfile() {}", ["-c"])
481+
index_binary_directory(dir, index_path, IndexingOptions())
482+
with SymbolIndex.open(index_path, readonly=True) as index:
483+
all_files = sorted(index.all_files())
484+
assert all_files == [file, short_file]
485+
486+
465487
def test_searching_by_wildcard(readonly_index):
466488
symbols = set(readonly_index.search("name:a_*"))
467489
assert symbols

0 commit comments

Comments
 (0)