Skip to content

Commit 888180a

Browse files
committed
Fix filesystem etag: use SHA-256 of content instead of st_mtime_ns
mtime resolution is insufficient on fast CI runners — two rapid writes can land within the same nanosecond, causing the etag to appear unchanged. SHA-256 of the file content is stable, deterministic, and always differs when the content changes.
1 parent 4528858 commit 888180a

File tree

1 file changed

+15
-9
lines changed

1 file changed

+15
-9
lines changed

jqueue/adapters/storage/filesystem.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,12 @@
99
1010
Etag strategy
1111
-------------
12-
The etag is the file's mtime in nanoseconds (st_mtime_ns), stringified.
13-
A file with st_size == 0 (or absent) is treated as non-existent; its etag
14-
is None. The jqueue codec always produces non-empty JSON, so a 0-byte file
15-
only occurs transiently before the first write completes.
12+
The etag is a SHA-256 hex digest of the file contents. This is stable,
13+
deterministic, and always changes when content changes — unlike mtime which
14+
can be identical across rapid successive writes on fast machines.
15+
A file that is absent or empty is treated as non-existent; its etag is None.
16+
The jqueue codec always produces non-empty JSON, so a 0-byte file only occurs
17+
transiently before the first write completes.
1618
1719
CAS semantics
1820
-------------
@@ -28,6 +30,7 @@
2830
import asyncio
2931
import dataclasses
3032
import fcntl
33+
import hashlib
3134
import os
3235
from pathlib import Path
3336

@@ -65,17 +68,20 @@ async def write(
6568
# Synchronous implementations (executed in a thread-pool worker) #
6669
# ------------------------------------------------------------------ #
6770

71+
@staticmethod
72+
def _etag(data: bytes) -> str:
73+
return hashlib.sha256(data).hexdigest()
74+
6875
def _sync_read(self) -> tuple[bytes, str | None]:
6976
if not self.path.exists():
7077
return b"", None
7178
with open(self.path, "rb") as fh:
7279
fcntl.flock(fh, fcntl.LOCK_SH)
7380
try:
7481
content = fh.read()
75-
stat = os.fstat(fh.fileno())
7682
finally:
7783
fcntl.flock(fh, fcntl.LOCK_UN)
78-
etag: str | None = str(stat.st_mtime_ns) if content else None
84+
etag: str | None = self._etag(content) if content else None
7985
return content, etag
8086

8187
def _sync_write(self, content: bytes, if_match: str | None) -> str:
@@ -84,8 +90,8 @@ def _sync_write(self, content: bytes, if_match: str | None) -> str:
8490
try:
8591
fcntl.flock(fd, fcntl.LOCK_EX)
8692

87-
stat = os.fstat(fd)
88-
real_etag: str | None = str(stat.st_mtime_ns) if stat.st_size > 0 else None
93+
existing = os.read(fd, os.fstat(fd).st_size)
94+
real_etag: str | None = self._etag(existing) if existing else None
8995

9096
if real_etag != if_match:
9197
raise CASConflictError(
@@ -99,4 +105,4 @@ def _sync_write(self, content: bytes, if_match: str | None) -> str:
99105
fcntl.flock(fd, fcntl.LOCK_UN)
100106
os.close(fd)
101107

102-
return str(os.stat(str(self.path)).st_mtime_ns)
108+
return self._etag(content)

0 commit comments

Comments
 (0)