Skip to content

Commit d51c16e

Browse files
Fix test failures and update to new type system
Source code fixes: - Add download_path setting and squeeze handling in fetch.py - Add filename collision handling in AttachType and XAttachType - Fix is_blob detection to check both BLOB and NATIVE_BLOB patterns - Fix FilepathType.validate to accept Path objects - Add proper error message for undecorated tables Test infrastructure updates: - Update schema_external.py to use new <xblob@store>, <xattach@store>, <filepath@store> syntax - Update all test tables to use <djblob> instead of longblob for serialization - Configure object_storage.stores in conftest.py fixtures - Remove obsolete test_admin.py (set_password was removed) - Fix connection passing in various tests to avoid credential prompts - Fix test_query_caching to handle existing directories README: - Add Developer Guide section with setup, test, and pre-commit instructions Test results: 408 passed, 2 skipped (macOS multiprocessing limitation) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <[email protected]>
1 parent 09d1f1d commit d51c16e

20 files changed

+277
-246
lines changed

README.md

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,3 +141,66 @@ DataJoint (<https://datajoint.com>).
141141
- [Contribution Guidelines](https://docs.datajoint.com/about/contribute/)
142142

143143
- [Developer Guide](https://docs.datajoint.com/core/datajoint-python/latest/develop/)
144+
145+
## Developer Guide
146+
147+
### Prerequisites
148+
149+
- [Docker](https://docs.docker.com/get-docker/) for running MySQL and MinIO services
150+
- [pixi](https://prefix.dev/docs/pixi/overview) package manager (or pip/conda)
151+
152+
### Setting Up the Development Environment
153+
154+
1. Clone the repository and install dependencies:
155+
156+
```bash
157+
git clone https://github.com/datajoint/datajoint-python.git
158+
cd datajoint-python
159+
pixi install
160+
```
161+
162+
2. Start the required services (MySQL and MinIO):
163+
164+
```bash
165+
docker compose up -d db minio
166+
```
167+
168+
### Running Tests
169+
170+
Run tests with pytest using the test environment:
171+
172+
```bash
173+
DJ_HOST=localhost DJ_PORT=3306 S3_ENDPOINT=localhost:9000 python -m pytest tests/
174+
```
175+
176+
Or run specific test files:
177+
178+
```bash
179+
DJ_HOST=localhost DJ_PORT=3306 S3_ENDPOINT=localhost:9000 python -m pytest tests/test_blob.py -v
180+
```
181+
182+
### Running Pre-commit Checks
183+
184+
Pre-commit hooks ensure code quality before commits. Install and run them:
185+
186+
```bash
187+
# Install pre-commit hooks
188+
pre-commit install
189+
190+
# Run all pre-commit checks manually
191+
pre-commit run --all-files
192+
193+
# Run specific hooks
194+
pre-commit run ruff --all-files
195+
pre-commit run mypy --all-files
196+
```
197+
198+
### Environment Variables
199+
200+
| Variable | Default | Description |
201+
|----------|---------|-------------|
202+
| `DJ_HOST` | `localhost` | MySQL server hostname |
203+
| `DJ_PORT` | `3306` | MySQL server port |
204+
| `DJ_USER` | `datajoint` | MySQL username |
205+
| `DJ_PASS` | `datajoint` | MySQL password |
206+
| `S3_ENDPOINT` | `localhost:9000` | MinIO/S3 endpoint |

src/datajoint/builtin_types.py

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -523,7 +523,22 @@ def decode(self, stored: bytes, *, key: dict | None = None) -> str:
523523
download_path.mkdir(parents=True, exist_ok=True)
524524
local_path = download_path / filename
525525

526-
local_path.write_bytes(contents)
526+
# Handle filename collision - if file exists with different content, add suffix
527+
if local_path.exists():
528+
existing_contents = local_path.read_bytes()
529+
if existing_contents != contents:
530+
# Find unique filename
531+
stem = local_path.stem
532+
suffix = local_path.suffix
533+
counter = 1
534+
while local_path.exists() and local_path.read_bytes() != contents:
535+
local_path = download_path / f"{stem}_{counter}{suffix}"
536+
counter += 1
537+
538+
# Only write if file doesn't exist or has different content
539+
if not local_path.exists():
540+
local_path.write_bytes(contents)
541+
527542
return str(local_path)
528543

529544
def validate(self, value: Any) -> None:
@@ -626,7 +641,22 @@ def decode(self, stored: bytes, *, key: dict | None = None) -> str:
626641
download_path.mkdir(parents=True, exist_ok=True)
627642
local_path = download_path / filename
628643

629-
local_path.write_bytes(contents)
644+
# Handle filename collision - if file exists with different content, add suffix
645+
if local_path.exists():
646+
existing_contents = local_path.read_bytes()
647+
if existing_contents != contents:
648+
# Find unique filename
649+
stem = local_path.stem
650+
suffix = local_path.suffix
651+
counter = 1
652+
while local_path.exists() and local_path.read_bytes() != contents:
653+
local_path = download_path / f"{stem}_{counter}{suffix}"
654+
counter += 1
655+
656+
# Only write if file doesn't exist or has different content
657+
if not local_path.exists():
658+
local_path.write_bytes(contents)
659+
630660
return str(local_path)
631661

632662
def validate(self, value: Any) -> None:
@@ -741,6 +771,8 @@ def decode(self, stored: dict, *, key: dict | None = None) -> Any:
741771
return ObjectRef.from_json(stored, backend=backend)
742772

743773
def validate(self, value: Any) -> None:
744-
"""Validate that value is a path string."""
745-
if not isinstance(value, str):
746-
raise TypeError(f"<filepath> expects a path string, got {type(value).__name__}")
774+
"""Validate that value is a path string or Path object."""
775+
from pathlib import Path
776+
777+
if not isinstance(value, (str, Path)):
778+
raise TypeError(f"<filepath> expects a path string or Path, got {type(value).__name__}")

src/datajoint/fetch.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,11 @@ def _get(connection, attr, data, squeeze, download_path):
4949
:param attr: attribute from the table's heading
5050
:param data: raw value fetched from the database
5151
:param squeeze: if True squeeze blobs (legacy, unused)
52-
:param download_path: for fetches that download data (legacy, unused in simplified model)
52+
:param download_path: for fetches that download data (attachments, filepaths)
5353
:return: decoded data
5454
"""
55+
from .settings import config
56+
5557
if data is None:
5658
return None
5759

@@ -69,9 +71,19 @@ def _get(connection, attr, data, squeeze, download_path):
6971
elif final_dtype.lower() == "binary(16)":
7072
data = uuid_module.UUID(bytes=data)
7173

72-
# Apply decoders in reverse order: innermost first, then outermost
73-
for attr_type in reversed(type_chain):
74-
data = attr_type.decode(data, key=None)
74+
# Temporarily set download_path for types that need it (attachments, filepaths)
75+
original_download_path = config.get("download_path", ".")
76+
config["download_path"] = str(download_path)
77+
try:
78+
# Apply decoders in reverse order: innermost first, then outermost
79+
for attr_type in reversed(type_chain):
80+
data = attr_type.decode(data, key=None)
81+
finally:
82+
config["download_path"] = original_download_path
83+
84+
# Apply squeeze for blob types (removes singleton dimensions from arrays)
85+
if squeeze and isinstance(data, np.ndarray):
86+
data = data.squeeze()
7587

7688
return data
7789

src/datajoint/heading.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -332,8 +332,8 @@ def _init_from_database(self):
332332
attr["type"] = attr["adapter"].dtype
333333
if not any(r.match(attr["type"]) for r in TYPE_PATTERN.values()):
334334
raise DataJointError(f"Invalid dtype '{attr['type']}' in attribute type <{adapter_name}>.")
335-
# Update is_blob based on resolved dtype
336-
attr["is_blob"] = bool(TYPE_PATTERN["BLOB"].match(attr["type"]))
335+
# Update is_blob based on resolved dtype (check both BLOB and NATIVE_BLOB patterns)
336+
attr["is_blob"] = any(TYPE_PATTERN[t].match(attr["type"]) for t in ("BLOB", "NATIVE_BLOB"))
337337

338338
# Handle core type aliases (uuid, float32, etc.)
339339
if special:

src/datajoint/settings.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,9 @@ class Config(BaseSettings):
267267
cache: Path | None = None
268268
query_cache: Path | None = None
269269

270+
# Download path for attachments and filepaths
271+
download_path: str = "."
272+
270273
# Internal: track where config was loaded from
271274
_config_path: Path | None = None
272275
_secrets_dir: Path | None = None
@@ -675,6 +678,29 @@ def __setitem__(self, key: str, value: Any) -> None:
675678
obj = getattr(obj, part)
676679
setattr(obj, parts[-1], value)
677680

681+
def __delitem__(self, key: str) -> None:
682+
"""Reset setting to default by dot-notation key."""
683+
# Get the default value from the model fields
684+
parts = key.split(".")
685+
if len(parts) == 1:
686+
field_info = self.model_fields.get(key)
687+
if field_info is not None:
688+
default = field_info.default
689+
if default is not None:
690+
setattr(self, key, default)
691+
elif field_info.default_factory is not None:
692+
setattr(self, key, field_info.default_factory())
693+
else:
694+
setattr(self, key, None)
695+
else:
696+
raise KeyError(f"Setting '{key}' not found")
697+
else:
698+
# For nested settings, reset to None or empty
699+
obj: Any = self
700+
for part in parts[:-1]:
701+
obj = getattr(obj, part)
702+
setattr(obj, parts[-1], None)
703+
678704
def get(self, key: str, default: Any = None) -> Any:
679705
"""Get setting with optional default value."""
680706
try:

src/datajoint/table.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,11 @@ def full_table_name(self):
254254
"""
255255
:return: full table name in the schema
256256
"""
257+
if self.database is None or self.table_name is None:
258+
raise DataJointError(
259+
f"Class {self.__class__.__name__} is not associated with a schema. "
260+
"Apply a schema decorator or use schema() to bind it."
261+
)
257262
return r"`{0:s}`.`{1:s}`".format(self.database, self.table_name)
258263

259264
@property

tests/conftest.py

Lines changed: 55 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -201,33 +201,56 @@ def connection_test(connection_root, prefix, db_creds_test):
201201

202202
@pytest.fixture(scope="session")
203203
def stores_config(s3_creds, tmpdir_factory):
204+
"""Configure object storage stores for tests."""
204205
return {
205-
"raw": dict(protocol="file", location=tmpdir_factory.mktemp("raw")),
206+
"raw": dict(protocol="file", location=str(tmpdir_factory.mktemp("raw"))),
206207
"repo": dict(
207-
stage=tmpdir_factory.mktemp("repo"),
208+
stage=str(tmpdir_factory.mktemp("repo")),
208209
protocol="file",
209-
location=tmpdir_factory.mktemp("repo"),
210+
location=str(tmpdir_factory.mktemp("repo")),
210211
),
211212
"repo-s3": dict(
212-
s3_creds,
213213
protocol="s3",
214+
endpoint=s3_creds["endpoint"],
215+
access_key=s3_creds["access_key"],
216+
secret_key=s3_creds["secret_key"],
217+
bucket=s3_creds.get("bucket", "datajoint-test"),
214218
location="dj/repo",
215-
stage=tmpdir_factory.mktemp("repo-s3"),
219+
stage=str(tmpdir_factory.mktemp("repo-s3")),
220+
secure=False, # MinIO runs without SSL in tests
221+
),
222+
"local": dict(protocol="file", location=str(tmpdir_factory.mktemp("local"))),
223+
"share": dict(
224+
protocol="s3",
225+
endpoint=s3_creds["endpoint"],
226+
access_key=s3_creds["access_key"],
227+
secret_key=s3_creds["secret_key"],
228+
bucket=s3_creds.get("bucket", "datajoint-test"),
229+
location="dj/store/repo",
230+
secure=False, # MinIO runs without SSL in tests
216231
),
217-
"local": dict(protocol="file", location=tmpdir_factory.mktemp("local"), subfolding=(1, 1)),
218-
"share": dict(s3_creds, protocol="s3", location="dj/store/repo", subfolding=(2, 4)),
219232
}
220233

221234

222235
@pytest.fixture
223236
def mock_stores(stores_config):
224-
og_stores_config = dj.config.get("stores")
225-
dj.config["stores"] = stores_config
237+
"""Configure object storage stores for tests using new object_storage system."""
238+
# Save original configuration
239+
og_project_name = dj.config.object_storage.project_name
240+
og_stores = dict(dj.config.object_storage.stores)
241+
242+
# Set test configuration
243+
dj.config.object_storage.project_name = "djtest"
244+
dj.config.object_storage.stores.clear()
245+
for name, config in stores_config.items():
246+
dj.config.object_storage.stores[name] = config
247+
226248
yield
227-
if og_stores_config is None:
228-
del dj.config["stores"]
229-
else:
230-
dj.config["stores"] = og_stores_config
249+
250+
# Restore original configuration
251+
dj.config.object_storage.project_name = og_project_name
252+
dj.config.object_storage.stores.clear()
253+
dj.config.object_storage.stores.update(og_stores)
231254

232255

233256
@pytest.fixture
@@ -663,31 +686,29 @@ def object_storage_config(tmpdir_factory):
663686

664687

665688
@pytest.fixture
666-
def mock_object_storage(object_storage_config, monkeypatch):
689+
def mock_object_storage(object_storage_config):
667690
"""Mock object storage configuration in datajoint config."""
668-
original_object_storage = getattr(dj.config, "_object_storage", None)
669-
670-
class MockObjectStorageSettings:
671-
def __init__(self, config):
672-
self.project_name = config["project_name"]
673-
self.protocol = config["protocol"]
674-
self.location = config["location"]
675-
self.token_length = config.get("token_length", 8)
676-
self.partition_pattern = config.get("partition_pattern")
677-
self.bucket = config.get("bucket")
678-
self.endpoint = config.get("endpoint")
679-
self.access_key = config.get("access_key")
680-
self.secret_key = config.get("secret_key")
681-
self.secure = config.get("secure", True)
682-
self.container = config.get("container")
683-
684-
mock_settings = MockObjectStorageSettings(object_storage_config)
685-
monkeypatch.setattr(dj.config, "object_storage", mock_settings)
691+
# Save original values
692+
original = {
693+
"project_name": dj.config.object_storage.project_name,
694+
"protocol": dj.config.object_storage.protocol,
695+
"location": dj.config.object_storage.location,
696+
"token_length": dj.config.object_storage.token_length,
697+
}
698+
699+
# Set test values
700+
dj.config.object_storage.project_name = object_storage_config["project_name"]
701+
dj.config.object_storage.protocol = object_storage_config["protocol"]
702+
dj.config.object_storage.location = object_storage_config["location"]
703+
dj.config.object_storage.token_length = object_storage_config.get("token_length", 8)
686704

687705
yield object_storage_config
688706

689-
if original_object_storage is not None:
690-
monkeypatch.setattr(dj.config, "object_storage", original_object_storage)
707+
# Restore original values
708+
dj.config.object_storage.project_name = original["project_name"]
709+
dj.config.object_storage.protocol = original["protocol"]
710+
dj.config.object_storage.location = original["location"]
711+
dj.config.object_storage.token_length = original["token_length"]
691712

692713

693714
@pytest.fixture

tests/schema.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -200,8 +200,8 @@ class Channel(dj.Part):
200200
-> master
201201
channel :tinyint unsigned # channel number within Ephys
202202
----
203-
voltage : longblob
204-
current = null : longblob # optional current to test null handling
203+
voltage : <djblob>
204+
current = null : <djblob> # optional current to test null handling
205205
"""
206206

207207
def _make_tuples(self, key):
@@ -228,7 +228,7 @@ class Image(dj.Manual):
228228
# table for testing blob inserts
229229
id : int # image identifier
230230
---
231-
img : longblob # image
231+
img : <djblob> # image
232232
"""
233233

234234

@@ -454,7 +454,7 @@ class Longblob(dj.Manual):
454454
definition = """
455455
id: int
456456
---
457-
data: longblob
457+
data: <djblob>
458458
"""
459459

460460

tests/schema_alter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ class Experiment(dj.Imported):
2020
experiment_id :smallint # experiment number for this subject
2121
---
2222
data_path : int # some number
23-
extra=null : longblob # just testing
23+
extra=null : <djblob> # just testing
2424
-> [nullable] User
2525
subject_notes=null :varchar(2048) # {notes} e.g. purpose of experiment
2626
entry_time=CURRENT_TIMESTAMP :timestamp # automatic timestamp

0 commit comments

Comments
 (0)