Skip to content

Commit d2ab4de

Browse files
refactor: Remove uuid_from_buffer, use hashlib directly for query cache
- Remove uuid_from_buffer from hash.py (dead code) - connection.py now uses hashlib.md5().hexdigest() directly - Update test_hash.py to test key_hash instead Co-Authored-By: Claude Opus 4.5 <[email protected]>
1 parent 0d1ffe7 commit d2ab4de

File tree

3 files changed

+7
-19
lines changed

3 files changed

+7
-19
lines changed

src/datajoint/connection.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
from __future__ import annotations
77

8+
import hashlib
89
import logging
910
import pathlib
1011
import re
@@ -18,7 +19,6 @@
1819
from . import errors
1920
from .blob import pack, unpack
2021
from .dependencies import Dependencies
21-
from .hash import uuid_from_buffer
2222
from .settings import config
2323
from .version import __version__
2424

@@ -418,7 +418,7 @@ def query(
418418
if use_query_cache:
419419
if not config[cache_key]:
420420
raise errors.DataJointError(f"Provide filepath dj.config['{cache_key}'] when using query caching.")
421-
hash_ = uuid_from_buffer((str(self._query_cache) + re.sub(r"`\$\w+`", "", query)).encode() + pack(args))
421+
hash_ = hashlib.md5((str(self._query_cache) + re.sub(r"`\$\w+`", "", query)).encode() + pack(args)).hexdigest()
422422
cache_path = pathlib.Path(config[cache_key]) / str(hash_)
423423
try:
424424
buffer = cache_path.read_bytes()

src/datajoint/hash.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from __future__ import annotations
22

33
import hashlib
4-
import uuid
54
from typing import Any
65

76

@@ -14,16 +13,3 @@ def key_hash(mapping: dict[str, Any]) -> str:
1413
for k, v in sorted(mapping.items()):
1514
hashed.update(str(v).encode())
1615
return hashed.hexdigest()
17-
18-
19-
def uuid_from_buffer(buffer: bytes = b"", *, init_string: str = "") -> uuid.UUID:
20-
"""
21-
Compute MD5 hash of buffer data, returned as UUID.
22-
23-
:param buffer: bytes to hash
24-
:param init_string: string to initialize the checksum (for namespacing)
25-
:return: UUID based on MD5 digest
26-
"""
27-
hashed = hashlib.md5(init_string.encode())
28-
hashed.update(buffer)
29-
return uuid.UUID(bytes=hashed.digest())

tests/unit/test_hash.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from datajoint import hash
22

33

4-
def test_hash():
5-
assert hash.uuid_from_buffer(b"abc").hex == "900150983cd24fb0d6963f7d28e17f72"
6-
assert hash.uuid_from_buffer(b"").hex == "d41d8cd98f00b204e9800998ecf8427e"
4+
def test_key_hash():
5+
"""Test that key_hash produces consistent MD5 hex digests."""
6+
assert hash.key_hash({"a": 1, "b": 2}) == hash.key_hash({"b": 2, "a": 1})
7+
assert hash.key_hash({"x": "hello"}) == "5d41402abc4b2a76b9719d911017c592"
8+
assert hash.key_hash({}) == "d41d8cd98f00b204e9800998ecf8427e"

0 commit comments

Comments
 (0)