Skip to content

Commit be872c7

Browse files
committed
Fix detection of files that were modified since last indexing
Xapian's `sortable_unserialise' returned a float, which caused rounding errors that could force some files to be always treated as modified. * bdx/binary.py (BinaryDirectory): Change `last_mtime' datetime attribute to integer `last_mtime_ns'. (BinaryDirectory.changed_files): Compare mtimes as integers. * bdx/index.py (SymbolIndex.mtime): Return nanosecond mtime, exactly as stored in the database. (index_binary_directory): Pass nanosecond mtime to `BinaryDirectory'. * tests/test_binary.py (setup_tmp_dir): Return nanosecond mtime. (test_find_changed_files): (test_find_deleted_files): Pass nanosecond mtime. * tests/test_index.py (_compile_file): New helper. (test_indexing_triggered_after_file_was_updated): (test_indexing_not_triggered_if_mtime_not_changed): New tests.
1 parent 8db6904 commit be872c7

File tree

4 files changed

+100
-21
lines changed

4 files changed

+100
-21
lines changed

bdx/binary.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -521,7 +521,7 @@ class BinaryDirectory:
521521

522522
path: Path
523523
exclusions: Collection[Exclusion] = field(default_factory=list)
524-
last_mtime: datetime = datetime.fromtimestamp(0)
524+
last_mtime_ns: int = 0
525525
previous_file_list: list[Path] = field(repr=False, default_factory=list)
526526
use_compilation_database: bool = False
527527

@@ -551,16 +551,17 @@ def changed_files(self) -> Iterator[Path]:
551551
desc="Finding changed files",
552552
leave=False,
553553
):
554-
mtime = datetime.fromtimestamp(path.stat().st_mtime)
554+
st = path.stat()
555555
is_new = path not in previous_state
556-
is_changed = not is_new and self.last_mtime < mtime
556+
is_changed = not is_new and self.last_mtime_ns < st.st_mtime_ns
557557

558558
trace(
559-
"{}: is_new={} is_changed={} mtime={}",
559+
"{}: is_new={} is_changed={} mtime={} ({})",
560560
path,
561561
is_new,
562562
is_changed,
563-
mtime,
563+
datetime.fromtimestamp(st.st_mtime),
564+
st.st_mtime_ns,
564565
)
565566

566567
if is_new or is_changed:

bdx/index.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
from collections.abc import Mapping
1212
from contextlib import contextmanager
1313
from dataclasses import asdict, dataclass, field
14-
from datetime import datetime
1514
from enum import Enum
1615
from functools import cache
1716
from pathlib import Path
@@ -688,16 +687,27 @@ def set_metadata(self, key: str, metadata: bytes):
688687
raise ValueError(msg)
689688
self._live_writable_db().set_metadata(key, metadata)
690689

691-
def mtime(self) -> datetime:
692-
"""Return the max modification time of this index."""
690+
def mtime(self) -> int:
691+
"""Return the max modification time of this index in nanoseconds."""
693692
db = self._live_db()
694693
field_data = self.schema["mtime"]
695-
val = db.get_value_upper_bound(field_data.slot) # pyright: ignore
694+
slot = field_data.slot # pyright: ignore
695+
val = db.get_value_upper_bound(slot)
696696
if not val:
697-
return datetime.fromtimestamp(0)
697+
return 0
698+
699+
# Search for symbol which has the known highest mtime. We
700+
# can't just use Xapian.sortable_unserialise() on ``val``, as
701+
# that converts the value to a float, and we lose precision,
702+
# causing errors and unexpected results, e.g. files won't be
703+
# treated as modified even though they were.
698704

699-
val_int = xapian.sortable_unserialise(val)
700-
return datetime.fromtimestamp(val_int / 1e9)
705+
enquire = xapian.Enquire(db)
706+
query = xapian.Query(xapian.Query.OP_VALUE_RANGE, slot, val, val)
707+
enquire.set_query(query)
708+
mset = enquire.get_mset(0, 1)
709+
results = self.MatchResults(mset.size(), mset)
710+
return list(results)[0].mtime
701711

702712
def binary_dir(self) -> Optional[Path]:
703713
"""Get binary directory of this index, set by ``set_binary_dir``."""
@@ -1083,14 +1093,14 @@ def index_binary_directory(
10831093
exclusions.append(excl)
10841094

10851095
if reindex:
1086-
mtime = datetime.fromtimestamp(0)
1096+
mtime_ns = 0
10871097
else:
1088-
mtime = index.mtime()
1098+
mtime_ns = index.mtime()
10891099
existing_files = set(index.all_files())
10901100
bdir = BinaryDirectory(
10911101
path=bindir_path,
10921102
exclusions=exclusions,
1093-
last_mtime=mtime,
1103+
last_mtime_ns=mtime_ns,
10941104
previous_file_list=list(existing_files),
10951105
use_compilation_database=use_compilation_database,
10961106
)

tests/test_binary.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,20 +23,22 @@ def setup_tmp_dir(
2323
file_list: list[Path],
2424
files_to_delete: Optional[list[Path]] = None,
2525
files_to_modify: Optional[list[Path]] = None,
26-
) -> tuple[list[Path], list[Path], list[Path], datetime]:
26+
) -> tuple[list[Path], list[Path], list[Path], int]:
2727
if files_to_delete is None:
2828
files_to_delete = []
2929
if files_to_modify is None:
3030
files_to_modify = []
3131

32-
max_mtime = datetime.now() - timedelta(seconds=1)
32+
mtime = datetime.now() - timedelta(seconds=1)
33+
mtime_ns = 0
3334
for f in file_list:
34-
create_fake_elf_file(f, max_mtime)
35+
create_fake_elf_file(f, mtime)
36+
mtime_ns = f.stat().st_mtime_ns
3537
for f in files_to_delete:
3638
f.unlink()
3739
for f in files_to_modify:
3840
f.touch()
39-
return file_list, files_to_delete, files_to_modify, max_mtime
41+
return file_list, files_to_delete, files_to_modify, mtime_ns
4042

4143

4244
def shuffled_int_range(stop):
@@ -101,7 +103,7 @@ def test_find_changed_files(tmp_path):
101103

102104
bdir = BinaryDirectory(
103105
tmp_path,
104-
last_mtime=max_mtime,
106+
last_mtime_ns=max_mtime,
105107
previous_file_list=file_list,
106108
)
107109
deleted_files = list(bdir.deleted_files())
@@ -139,7 +141,7 @@ def test_find_deleted_files(tmp_path):
139141

140142
bdir = BinaryDirectory(
141143
tmp_path,
142-
last_mtime=max_mtime,
144+
last_mtime_ns=max_mtime,
143145
previous_file_list=file_list,
144146
)
145147
changed_files = list(bdir.changed_files())

tests/test_index.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1+
import os
12
import shutil
23
from pathlib import Path
34
from shutil import rmtree
5+
from subprocess import check_call
46

57
import pytest
68

@@ -17,6 +19,16 @@
1719
# isort: on
1820

1921

22+
def _compile_file(
23+
output_file: Path, source: str, flags: list[str] | None = None, ext="c"
24+
):
25+
if not flags:
26+
flags = []
27+
source_file = output_file.parent / f"{output_file.stem}.{ext}"
28+
source_file.write_text(source)
29+
check_call(["gcc", str(source_file), "-o", str(output_file), *flags])
30+
31+
2032
def test_indexing(fixture_path, tmp_path):
2133
index_path = tmp_path / "index"
2234
index_binary_directory(
@@ -400,6 +412,60 @@ def test_indexing_delete_saved_filters(fixture_path, tmp_path):
400412
assert any([p.name.endswith(".cpp.o") for p in paths])
401413

402414

415+
def test_indexing_triggered_after_file_was_updated(tmp_path):
416+
dir = tmp_path / "build"
417+
dir.mkdir()
418+
419+
file: Path = dir / "foo.o"
420+
index_path = tmp_path / "index"
421+
422+
_compile_file(file, "void foo() {}", ["-c"])
423+
index_binary_directory(dir, index_path, IndexingOptions())
424+
with SymbolIndex.open(index_path, readonly=True) as index:
425+
symbols = {s.name: s for s in index.search("*:*")}
426+
assert "foo" in symbols
427+
428+
_compile_file(file, "void bar() {}", ["-c"])
429+
index_binary_directory(dir, index_path, IndexingOptions())
430+
with SymbolIndex.open(index_path, readonly=True) as index:
431+
symbols = {s.name: s for s in index.search("*:*")}
432+
assert "foo" not in symbols
433+
assert "bar" in symbols
434+
435+
st = file.stat()
436+
_compile_file(file, "void quux() {}", ["-c"])
437+
os.utime(file, ns=(st.st_atime_ns + 1, st.st_mtime_ns + 1))
438+
index_binary_directory(dir, index_path, IndexingOptions())
439+
with SymbolIndex.open(index_path, readonly=True) as index:
440+
symbols = {s.name: s for s in index.search("*:*")}
441+
assert "foo" not in symbols
442+
assert "bar" not in symbols
443+
assert "quux" in symbols
444+
445+
446+
def test_indexing_not_triggered_if_mtime_not_changed(tmp_path):
447+
dir = tmp_path / "build"
448+
dir.mkdir()
449+
450+
file: Path = dir / "foo.o"
451+
index_path = tmp_path / "index"
452+
453+
_compile_file(file, "void foo() {}", ["-c"])
454+
index_binary_directory(dir, index_path, IndexingOptions())
455+
with SymbolIndex.open(index_path, readonly=True) as index:
456+
symbols = {s.name: s for s in index.search("*:*")}
457+
assert "foo" in symbols
458+
459+
st = file.stat()
460+
_compile_file(file, "void quux() {}", ["-c"])
461+
os.utime(file, ns=(st.st_atime_ns, st.st_mtime_ns))
462+
index_binary_directory(dir, index_path, IndexingOptions())
463+
with SymbolIndex.open(index_path, readonly=True) as index:
464+
symbols = {s.name: s for s in index.search("*:*")}
465+
assert "foo" in symbols
466+
assert "quux" not in symbols
467+
468+
403469
def test_searching_by_wildcard(readonly_index):
404470
symbols = set(readonly_index.search("name:a_*"))
405471
assert symbols

0 commit comments

Comments
 (0)