Skip to content

Commit 34cd92c

Browse files
OSS-Fuzz Teamcopybara-github
authored andcommitted
Expose indexer DB schema versioning
PiperOrigin-RevId: 782506523
1 parent 12fae5a commit 34cd92c

File tree

1 file changed

+38
-3
lines changed

1 file changed

+38
-3
lines changed

infra/base-images/base-builder/indexer/manifest_types.py

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@
4545
OBJ_DIR = pathlib.Path("obj")
4646
# Directory for indexer data.
4747
INDEX_DIR = pathlib.Path("idx")
48+
# The index database filename.
49+
INDEX_DB = pathlib.Path("db.sqlite")
4850
# Library directory, where shared libraries are copied - inside obj.
4951
LIB_DIR = OBJ_DIR / "lib"
5052
# Manifest location
@@ -54,7 +56,7 @@
5456
# Min archive version we currently support.
5557
_MIN_SUPPORTED_ARCHIVE_VERSION = 1
5658
# The current version of the build archive format.
57-
ARCHIVE_VERSION = 4
59+
ARCHIVE_VERSION = 5
5860
# OSS-Fuzz $OUT dir.
5961
OUT = pathlib.Path(os.getenv("OUT", "/out"))
6062
# OSS-Fuzz coverage info.
@@ -209,6 +211,26 @@ def from_dict(cls, config_dict: Mapping[Any, Any]) -> Self:
209211

210212

211213

214+
def _get_sqlite_db_user_version(sqlite_db_path: pathlib.Path) -> int:
215+
"""Retrieves `PRAGMA user_version;` value without connecting to the database."""
216+
with sqlite_db_path.open("rb") as stream:
217+
# https://www.sqlite.org/pragma.html#pragma_user_version - a big-endian
218+
# 32-bit number at offset 60 of the database header.
219+
too_small_error = ValueError(
220+
f"The file '{sqlite_db_path}' is too small for an SQLite database."
221+
)
222+
try:
223+
stream.seek(60)
224+
except OSError as e:
225+
raise too_small_error from e
226+
227+
version_bytes = stream.read(4)
228+
if len(version_bytes) < 4:
229+
raise too_small_error
230+
231+
return int.from_bytes(version_bytes, byteorder="big")
232+
233+
212234
@dataclasses.dataclass(frozen=True)
213235
class Manifest:
214236
"""Contains general meta-information about the snapshot."""
@@ -245,6 +267,9 @@ class Manifest:
245267
# Version of the manifest spec.
246268
version: int = ARCHIVE_VERSION
247269

270+
# Version of the index database schema.
271+
index_db_version: int | None = None
272+
248273
@classmethod
249274
def from_dict(cls, data: dict[str, Any]) -> Self:
250275
"""Creates a Manifest object from a deserialized dict."""
@@ -282,6 +307,7 @@ def from_dict(cls, data: dict[str, Any]) -> Self:
282307
)
283308
return Manifest(
284309
version=version,
310+
index_db_version=data.get("index_db_version"),
285311
name=data["name"],
286312
uuid=data["uuid"],
287313
lib_mount_path=lib_mount_path,
@@ -365,7 +391,7 @@ def save_build(
365391
archive_path: pathlib.PurePath,
366392
out_dir: pathlib.PurePath = pathlib.Path("/out"),
367393
overwrite: bool = True,
368-
) -> None:
394+
) -> Self:
369395
"""Saves a build archive with this Manifest."""
370396
if os.path.exists(archive_path) and not overwrite:
371397
raise FileExistsError(f"Not overwriting existing archive {archive_path}")
@@ -416,13 +442,20 @@ def _save_dir(
416442
arcname=prefix + str(file.relative_to(path)),
417443
)
418444

445+
dumped_self = self
446+
if self.index_db_version is None:
447+
index_db_version = _get_sqlite_db_user_version(index_dir / INDEX_DB)
448+
dumped_self = dataclasses.replace(
449+
self, index_db_version=index_db_version
450+
)
451+
419452
# Make sure the manifest is the first file in the archive to avoid
420453
# seeking when we only need the manifest.
421454
_add_string_to_tar(
422455
tar,
423456
MANIFEST_PATH.as_posix(),
424457
json.dumps(
425-
self.to_dict(),
458+
dumped_self.to_dict(),
426459
indent=2,
427460
),
428461
)
@@ -452,6 +485,8 @@ def _save_dir(
452485

453486
shutil.copyfile(tmp.name, archive_path)
454487

488+
return dumped_self
489+
455490

456491
def report_missing_source_files(
457492
binary_name: str, copied_files: list[str], tar: tarfile.TarFile):

0 commit comments

Comments
 (0)