diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 6267cd6f1..a4a91448f 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -1,37 +1,55 @@ name: Test + on: push: branches: - - "**" # every branch - - "!gh-pages" # exclude gh-pages branch - - "!stage*" # exclude branches beginning with stage + - "**" + - "!gh-pages" + - "!stage*" paths: - - "src/datajoint" - - "tests" + - "src/datajoint/**" + - "tests/**" + - "pyproject.toml" + - "pixi.lock" + - ".github/workflows/test.yaml" pull_request: branches: - - "**" # every branch - - "!gh-pages" # exclude gh-pages branch - - "!stage*" # exclude branches beginning with stage + - "**" + - "!gh-pages" + - "!stage*" paths: - - "src/datajoint" - - "tests" + - "src/datajoint/**" + - "tests/**" + - "pyproject.toml" + - "pixi.lock" + - ".github/workflows/test.yaml" + jobs: test: runs-on: ubuntu-latest - strategy: - matrix: - py_ver: ["3.10", "3.11", "3.12", "3.13"] - mysql_ver: ["8.0"] steps: - uses: actions/checkout@v4 - - name: Set up Python ${{matrix.py_ver}} - uses: actions/setup-python@v5 + + - name: Set up pixi + uses: prefix-dev/setup-pixi@v0.9.3 with: - python-version: ${{matrix.py_ver}} - - name: Integration test - env: - MYSQL_VER: ${{matrix.mysql_ver}} - run: | - pip install -e ".[test]" - pytest --cov-report term-missing --cov=datajoint tests + cache: true + locked: false + + - name: Run tests + run: pixi run -e test test-cov + + # Unit tests run without containers (faster feedback) + unit-tests: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up pixi + uses: prefix-dev/setup-pixi@v0.9.3 + with: + cache: true + locked: false + + - name: Run unit tests + run: pixi run -e test pytest tests/unit -v diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6c38487d2..f2bd59004 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -36,6 +36,24 @@ repos: hooks: # lint github actions workflow yaml - id: actionlint - -## Suggest to add pytest hook that runs unit test | Prerequisite: split unit/integration test -## https://github.com/datajoint/datajoint-python/issues/1211 +- repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.14.1 + hooks: + - id: mypy + files: ^src/datajoint/ + additional_dependencies: + - pydantic + - pydantic-settings + - types-PyMySQL + - types-tqdm + - pandas-stubs + - numpy +- repo: local + hooks: + - id: unit-tests + name: unit tests + entry: pytest tests/unit/ -v --tb=short + language: system + pass_filenames: false + always_run: true + stages: [pre-commit] diff --git a/README.md b/README.md index 85c3269e7..40a1a2c7c 100644 --- a/README.md +++ b/README.md @@ -100,23 +100,25 @@ Scientific data includes both structured metadata and large data objects (time s ### Prerequisites - [Docker](https://docs.docker.com/get-docker/) (Docker daemon must be running) -- Python 3.10+ +- [pixi](https://pixi.sh) (recommended) or Python 3.10+ -### Quick Start +### Quick Start with pixi (Recommended) + +[pixi](https://pixi.sh) manages all dependencies including Python, graphviz, and test tools: ```bash -# Clone and install +# Clone the repo git clone https://github.com/datajoint/datajoint-python.git cd datajoint-python -pip install -e ".[test]" -# Run all tests (containers start automatically via testcontainers) -pytest tests/ +# Install dependencies and run tests (containers managed by testcontainers) +pixi run test -# Install and run pre-commit hooks -pip install pre-commit -pre-commit install -pre-commit run --all-files +# Run with coverage +pixi run test-cov + +# Run pre-commit hooks +pixi run pre-commit run --all-files ``` ### Running Tests @@ -126,16 +128,30 @@ Tests use [testcontainers](https://testcontainers.com/) to automatically manage ```bash # Run all tests (recommended) -pytest tests/ +pixi run test # Run with coverage report -pytest --cov-report term-missing --cov=datajoint tests/ +pixi run test-cov + +# Run only unit tests (no containers needed) +pixi run -e test pytest tests/unit/ # Run specific test file -pytest tests/integration/test_blob.py -v +pixi run -e test pytest tests/integration/test_blob.py -v +``` -# Run only unit tests (no containers needed) -pytest tests/unit/ +**macOS Docker Desktop users:** If tests fail to connect to Docker, set `DOCKER_HOST`: +```bash +export DOCKER_HOST=unix://$HOME/.docker/run/docker.sock +``` + +### Alternative: Using pip + +If you prefer pip over pixi: + +```bash +pip install -e ".[test]" +pytest tests/ ``` ### Alternative: External Containers @@ -147,7 +163,8 @@ For development/debugging, you may prefer persistent containers that survive tes docker compose up -d db minio # Run tests using external containers -DJ_USE_EXTERNAL_CONTAINERS=1 pytest tests/ +DJ_USE_EXTERNAL_CONTAINERS=1 pixi run test +# Or with pip: DJ_USE_EXTERNAL_CONTAINERS=1 pytest tests/ # Stop containers when done docker compose down @@ -161,15 +178,6 @@ Run tests entirely in Docker (no local Python needed): docker compose --profile test up djtest --build ``` -### Alternative: Using pixi - -[pixi](https://pixi.sh) users can run tests with: - -```bash -pixi install # First time setup -pixi run test # Runs tests (testcontainers manages containers) -``` - ### Pre-commit Hooks Pre-commit hooks run automatically on `git commit` to check code quality. @@ -177,15 +185,14 @@ Pre-commit hooks run automatically on `git commit` to check code quality. ```bash # Install hooks (first time only) -pip install pre-commit -pre-commit install +pixi run pre-commit install +# Or with pip: pip install pre-commit && pre-commit install # Run all checks manually -pre-commit run --all-files +pixi run pre-commit run --all-files # Run specific hook -pre-commit run ruff --all-files -pre-commit run codespell --all-files +pixi run pre-commit run ruff --all-files ``` Hooks include: @@ -196,9 +203,9 @@ Hooks include: ### Before Submitting a PR -1. **Run all tests**: `pytest tests/` -2. **Run pre-commit**: `pre-commit run --all-files` -3. **Check coverage**: `pytest --cov-report term-missing --cov=datajoint tests/` +1. **Run all tests**: `pixi run test` +2. **Run pre-commit**: `pixi run pre-commit run --all-files` +3. **Check coverage**: `pixi run test-cov` ### Environment Variables diff --git a/RELEASE_MEMO.md b/RELEASE_MEMO.md new file mode 100644 index 000000000..25fdc6ca0 --- /dev/null +++ b/RELEASE_MEMO.md @@ -0,0 +1,117 @@ +# DataJoint 2.0 Release Memo + +## PyPI Release Process + +### Steps + +1. **Run "Manual Draft Release" workflow** on GitHub Actions +2. **Edit the draft release**: + - Change release name to `Release 2.0.0` + - Change tag to `v2.0.0` +3. **Publish the release** +4. Automation will: + - Update `version.py` to `2.0.0` + - Build and publish to PyPI + - Create PR to merge version update back to master + +### Version Note + +The release drafter computes version from the previous tag (`v0.14.6`), so it would generate `0.14.7` or `0.15.0`. You must **manually edit** the release name to include `2.0.0`. + +The regex on line 42 of `post_draft_release_published.yaml` extracts version from the release name: +```bash +VERSION=$(echo "${{ github.event.release.name }}" | grep -oP '\d+\.\d+\.\d+') +``` + +--- + +## Conda-Forge Release Process + +DataJoint has a [conda-forge feedstock](https://github.com/conda-forge/datajoint-feedstock). + +### How Conda-Forge Updates Work + +Conda-forge has **automated bots** that detect new PyPI releases and create PRs automatically: + +1. **You publish to PyPI** (via the GitHub release workflow) +2. **regro-cf-autotick-bot** detects the new version within ~24 hours +3. **Bot creates a PR** to the feedstock with updated version and hash +4. **Maintainers review and merge** (you're listed as a maintainer) +5. **Package builds automatically** for all platforms + +### Manual Update (if bot doesn't trigger) + +If the bot doesn't create a PR, manually update the feedstock: + +1. **Fork** [conda-forge/datajoint-feedstock](https://github.com/conda-forge/datajoint-feedstock) + +2. **Edit `recipe/meta.yaml`**: + ```yaml + {% set version = "2.0.0" %} + + package: + name: datajoint + version: {{ version }} + + source: + url: https://pypi.io/packages/source/d/datajoint/datajoint-{{ version }}.tar.gz + sha256: + + build: + number: 0 # Reset to 0 for new version + ``` + +3. **Get the SHA256 hash**: + ```bash + curl -sL https://pypi.org/pypi/datajoint/2.0.0/json | jq -r '.urls[] | select(.packagetype=="sdist") | .digests.sha256' + ``` + +4. **Update license** (important for 2.0!): + ```yaml + about: + license: Apache-2.0 # Changed from LGPL-2.1-only + license_file: LICENSE + ``` + +5. **Submit PR** to the feedstock + +### Action Items for 2.0 Release + +1. **First**: Publish to PyPI via GitHub release (name it "Release 2.0.0") +2. **Wait**: ~24 hours for conda-forge bot to detect +3. **Check**: [datajoint-feedstock PRs](https://github.com/conda-forge/datajoint-feedstock/pulls) for auto-PR +4. **Review**: Ensure license changed from LGPL to Apache-2.0 +5. **Merge**: As maintainer, approve and merge the PR + +### Timeline + +| Step | When | +|------|------| +| PyPI release | Day 0 | +| Bot detects & creates PR | Day 0-1 | +| Review & merge PR | Day 1-2 | +| Conda-forge package available | Day 1-2 | + +### Verification + +After release: +```bash +conda search datajoint -c conda-forge +# Should show 2.0.0 +``` + +--- + +## Maintainers + +- @datajointbot +- @dimitri-yatsenko +- @drewyangdev +- @guzman-raphael +- @ttngu207 + +## Links + +- [datajoint-feedstock on GitHub](https://github.com/conda-forge/datajoint-feedstock) +- [datajoint on Anaconda.org](https://anaconda.org/conda-forge/datajoint) +- [datajoint on PyPI](https://pypi.org/project/datajoint/) diff --git a/docs/src/archive/manipulation/delete.md b/docs/src/archive/manipulation/delete.md index 4e34c69ce..e61e8a2b8 100644 --- a/docs/src/archive/manipulation/delete.md +++ b/docs/src/archive/manipulation/delete.md @@ -26,6 +26,6 @@ Entities in a [part table](../design/tables/master-part.md) are usually removed consequence of deleting the master table. To enforce this workflow, calling `delete` directly on a part table produces an error. -In some cases, it may be necessary to override this behavior. -To remove entities from a part table without calling `delete` master, use the argument `force_parts=True`. -To include the corresponding entries in the master table, use the argument `force_masters=True`. +In some cases, it may be necessary to override this behavior using the `part_integrity` parameter: +- `part_integrity="ignore"`: Remove entities from a part table without deleting from master (breaks integrity). +- `part_integrity="cascade"`: Delete from parts and also cascade up to delete the corresponding master entries. diff --git a/pyproject.toml b/pyproject.toml index f3eee2313..ac3af18df 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -152,6 +152,60 @@ quote-style = "double" indent-style = "space" line-ending = "auto" +[tool.mypy] +python_version = "3.10" +ignore_missing_imports = true +# Start with lenient settings, gradually enable stricter checks +warn_return_any = false +warn_unused_ignores = false +disallow_untyped_defs = false +disallow_incomplete_defs = false +check_untyped_defs = true + +# Modules with complete type coverage - strict checking enabled +[[tool.mypy.overrides]] +module = [ + "datajoint.content_registry", +] +disallow_untyped_defs = true +disallow_incomplete_defs = true +warn_return_any = true + +# Modules excluded from type checking until fully typed +[[tool.mypy.overrides]] +module = [ + "datajoint.admin", + "datajoint.autopopulate", + "datajoint.blob", + "datajoint.builtin_codecs", + "datajoint.cli", + "datajoint.codecs", + "datajoint.condition", + "datajoint.connection", + "datajoint.declare", + "datajoint.dependencies", + "datajoint.diagram", + "datajoint.errors", + "datajoint.expression", + "datajoint.gc", + "datajoint.hash", + "datajoint.heading", + "datajoint.jobs", + "datajoint.lineage", + "datajoint.logging", + "datajoint.migrate", + "datajoint.objectref", + "datajoint.preview", + "datajoint.schemas", + "datajoint.settings", + "datajoint.staged_insert", + "datajoint.storage", + "datajoint.table", + "datajoint.user_tables", + "datajoint.utils", +] +ignore_errors = true + [tool.setuptools] packages = ["datajoint"] package-dir = {"" = "src"} @@ -180,6 +234,12 @@ platforms = ["linux-64", "osx-arm64", "linux-aarch64"] [tool.pixi.pypi-dependencies] datajoint = { path = ".", editable = true } +[tool.pixi.feature.test.pypi-dependencies] +datajoint = { path = ".", editable = true, extras = ["test"] } + +[tool.pixi.feature.dev.pypi-dependencies] +datajoint = { path = ".", editable = true, extras = ["dev", "test"] } + [tool.pixi.environments] default = { solve-group = "default" } dev = { features = ["dev"], solve-group = "default" } diff --git a/src/datajoint/autopopulate.py b/src/datajoint/autopopulate.py index 6c4539760..80c669079 100644 --- a/src/datajoint/autopopulate.py +++ b/src/datajoint/autopopulate.py @@ -93,27 +93,40 @@ class AutoPopulate: _allow_insert = False _jobs = None - @property - def jobs(self) -> Job: - """ - Access the job table for this auto-populated table. + class _JobsDescriptor: + """Descriptor allowing jobs access on both class and instance.""" - The job table (``~~table_name``) is created lazily on first access. - It tracks job status, priority, scheduling, and error information - for distributed populate operations. + def __get__(self, obj, objtype=None): + """ + Access the job table for this auto-populated table. - Returns - ------- - Job - Job management object for this table. - """ - if self._jobs is None: - from .jobs import Job + The job table (``~~table_name``) is created lazily on first access. + It tracks job status, priority, scheduling, and error information + for distributed populate operations. + + Can be accessed on either the class or an instance:: + + # Both work equivalently + Analysis.jobs.refresh() + Analysis().jobs.refresh() + + Returns + ------- + Job + Job management object for this table. + """ + if obj is None: + # Accessed on class - instantiate first + obj = objtype() + if obj._jobs is None: + from .jobs import Job + + obj._jobs = Job(obj) + if not obj._jobs.is_declared: + obj._jobs.declare() + return obj._jobs - self._jobs = Job(self) - if not self._jobs.is_declared: - self._jobs.declare() - return self._jobs + jobs: Job = _JobsDescriptor() def _declare_check(self, primary_key: list[str], fk_attribute_map: dict[str, tuple[str, str]]) -> None: """ @@ -474,8 +487,8 @@ def handler(signum, frame): if refresh: self.jobs.refresh(*restrictions, priority=priority) - # Fetch pending jobs ordered by priority - pending_query = self.jobs.pending & "scheduled_time <= NOW()" + # Fetch pending jobs ordered by priority (use NOW(3) to match CURRENT_TIMESTAMP(3) precision) + pending_query = self.jobs.pending & "scheduled_time <= NOW(3)" if priority is not None: pending_query = pending_query & f"priority <= {priority}" diff --git a/src/datajoint/condition.py b/src/datajoint/condition.py index 8ab19ca5d..ea7d7a504 100644 --- a/src/datajoint/condition.py +++ b/src/datajoint/condition.py @@ -107,32 +107,73 @@ class Top: ---------- limit : int, optional Maximum number of rows to return. Default 1. - order_by : str or list[str], optional - Attributes to order by. ``"KEY"`` for primary key. Default ``"KEY"``. + order_by : str or list[str] or None, optional + Attributes to order by. ``"KEY"`` for primary key order. + ``None`` means inherit ordering from an existing Top (or default to KEY). + Default ``"KEY"``. offset : int, optional Number of rows to skip. Default 0. + + Examples + -------- + >>> query & dj.Top(5) # Top 5 by primary key + >>> query & dj.Top(10, 'score DESC') # Top 10 by score descending + >>> query & dj.Top(10, order_by=None) # Top 10, inherit existing order + >>> query & dj.Top(5, offset=10) # Skip 10, take 5 """ limit: int | None = 1 - order_by: str | list[str] = "KEY" + order_by: str | list[str] | None = "KEY" offset: int = 0 def __post_init__(self) -> None: - self.order_by = self.order_by or ["KEY"] self.offset = self.offset or 0 if self.limit is not None and not isinstance(self.limit, int): raise TypeError("Top limit must be an integer") - if not isinstance(self.order_by, (str, collections.abc.Sequence)) or not all( - isinstance(r, str) for r in self.order_by - ): - raise TypeError("Top order_by attributes must all be strings") + if self.order_by is not None: + if not isinstance(self.order_by, (str, collections.abc.Sequence)) or not all( + isinstance(r, str) for r in self.order_by + ): + raise TypeError("Top order_by attributes must all be strings") + if isinstance(self.order_by, str): + self.order_by = [self.order_by] if not isinstance(self.offset, int): raise TypeError("The offset argument must be an integer") if self.offset and self.limit is None: self.limit = 999999999999 # arbitrary large number to allow query - if isinstance(self.order_by, str): - self.order_by = [self.order_by] + + def merge(self, other: "Top") -> "Top": + """ + Merge another Top into this one (when other inherits ordering). + + Used when ``other.order_by`` is None or matches ``self.order_by``. + + Parameters + ---------- + other : Top + The Top to merge. Its order_by should be None or equal to self.order_by. + + Returns + ------- + Top + New Top with merged limit/offset and preserved ordering. + """ + # Compute effective limit (minimum of defined limits) + if self.limit is None and other.limit is None: + new_limit = None + elif self.limit is None: + new_limit = other.limit + elif other.limit is None: + new_limit = self.limit + else: + new_limit = min(self.limit, other.limit) + + return Top( + limit=new_limit, + order_by=self.order_by, # preserve existing ordering + offset=self.offset + other.offset, # offsets add + ) class Not: diff --git a/src/datajoint/expression.py b/src/datajoint/expression.py index feb4a7d97..971a1ee5e 100644 --- a/src/datajoint/expression.py +++ b/src/datajoint/expression.py @@ -132,7 +132,9 @@ def where_clause(self): def sorting_clauses(self): if not self._top: return "" - clause = ", ".join(_wrap_attributes(_flatten_attribute_list(self.primary_key, self._top.order_by))) + # Default to KEY ordering if order_by is None (inherit with no existing order) + order_by = self._top.order_by if self._top.order_by is not None else ["KEY"] + clause = ", ".join(_wrap_attributes(_flatten_attribute_list(self.primary_key, order_by))) if clause: clause = f" ORDER BY {clause}" if self._top.limit is not None: @@ -216,10 +218,18 @@ def restrict(self, restriction, semantic_check=True): """ attributes = set() if isinstance(restriction, Top): - result = ( - self.make_subquery() if self._top and not self._top.__eq__(restriction) else copy.copy(self) - ) # make subquery to avoid overwriting existing Top - result._top = restriction + if self._top is None: + # No existing Top — apply new one directly + result = copy.copy(self) + result._top = restriction + elif restriction.order_by is None or restriction.order_by == self._top.order_by: + # Merge: new Top inherits or matches existing ordering + result = copy.copy(self) + result._top = self._top.merge(restriction) + else: + # Different ordering — need subquery + result = self.make_subquery() + result._top = restriction return result new_condition = make_condition(self, restriction, attributes, semantic_check=semantic_check) if new_condition is True: diff --git a/src/datajoint/jobs.py b/src/datajoint/jobs.py index 57533b8f3..7be80a0e5 100644 --- a/src/datajoint/jobs.py +++ b/src/datajoint/jobs.py @@ -348,8 +348,6 @@ def refresh( 3. Remove stale jobs: jobs older than stale_timeout whose keys not in key_source 4. Remove orphaned jobs: reserved jobs older than orphan_timeout (if specified) """ - from datetime import datetime, timedelta - from .settings import config # Ensure jobs table exists @@ -363,7 +361,6 @@ def refresh( stale_timeout = config.jobs.stale_timeout result = {"added": 0, "removed": 0, "orphaned": 0, "re_pended": 0} - now = datetime.now() # 1. Add new jobs key_source = self._target.key_source @@ -378,7 +375,9 @@ def refresh( new_key_list = new_keys.keys() if new_key_list: - scheduled_time = now + timedelta(seconds=delay) if delay > 0 else now + # Always use MySQL server time for scheduling (NOW(3) matches datetime(3) precision) + scheduled_time = self.connection.query(f"SELECT NOW(3) + INTERVAL {delay} SECOND").fetchone()[0] + for key in new_key_list: job_entry = { **key, @@ -403,10 +402,9 @@ def refresh( self.insert1({**key, "status": "pending", "priority": priority}) result["re_pended"] += 1 - # 3. Remove stale jobs (not ignore status) + # 3. Remove stale jobs (not ignore status) - use MySQL NOW() for consistent timing if stale_timeout > 0: - stale_cutoff = now - timedelta(seconds=stale_timeout) - old_jobs = self & f'created_time < "{stale_cutoff}"' & 'status != "ignore"' + old_jobs = self & f"created_time < NOW() - INTERVAL {stale_timeout} SECOND" & 'status != "ignore"' for key in old_jobs.keys(): # Check if key still in key_source @@ -414,10 +412,9 @@ def refresh( (self & key).delete_quick() result["removed"] += 1 - # 4. Handle orphaned reserved jobs + # 4. Handle orphaned reserved jobs - use MySQL NOW() for consistent timing if orphan_timeout is not None and orphan_timeout > 0: - orphan_cutoff = now - timedelta(seconds=orphan_timeout) - orphaned_jobs = self.reserved & f'reserved_time < "{orphan_cutoff}"' + orphaned_jobs = self.reserved & f"reserved_time < NOW() - INTERVAL {orphan_timeout} SECOND" for key in orphaned_jobs.keys(): (self & key).delete_quick() @@ -443,21 +440,21 @@ def reserve(self, key: dict) -> bool: bool True if reservation successful, False if job not available. """ - from datetime import datetime - - # Check if job is pending and scheduled - now = datetime.now() - job = (self & key & 'status="pending"' & f'scheduled_time <= "{now}"').to_dicts() + # Check if job is pending and scheduled (use NOW(3) to match CURRENT_TIMESTAMP(3) precision) + job = (self & key & 'status="pending"' & "scheduled_time <= NOW(3)").to_dicts() if not job: return False + # Get MySQL server time for reserved_time + server_now = self.connection.query("SELECT NOW()").fetchone()[0] + # Build update row with primary key and new values pk = self._get_pk(key) update_row = { **pk, "status": "reserved", - "reserved_time": now, + "reserved_time": server_now, "host": platform.node(), "pid": os.getpid(), "connection_id": self.connection.connection_id, @@ -489,16 +486,16 @@ def complete(self, key: dict, duration: float | None = None) -> None: - If True: updates status to ``'success'`` with completion time and duration - If False: deletes the job entry """ - from datetime import datetime - from .settings import config if config.jobs.keep_completed: + # Use MySQL server time for completed_time + server_now = self.connection.query("SELECT NOW()").fetchone()[0] pk = self._get_pk(key) update_row = { **pk, "status": "success", - "completed_time": datetime.now(), + "completed_time": server_now, } if duration is not None: update_row["duration"] = duration @@ -519,16 +516,17 @@ def error(self, key: dict, error_message: str, error_stack: str | None = None) - error_stack : str, optional Full stack trace. """ - from datetime import datetime - if len(error_message) > ERROR_MESSAGE_LENGTH: error_message = error_message[: ERROR_MESSAGE_LENGTH - len(TRUNCATION_APPENDIX)] + TRUNCATION_APPENDIX + # Use MySQL server time for completed_time + server_now = self.connection.query("SELECT NOW()").fetchone()[0] + pk = self._get_pk(key) update_row = { **pk, "status": "error", - "completed_time": datetime.now(), + "completed_time": server_now, "error_message": error_message, } if error_stack is not None: diff --git a/src/datajoint/schemas.py b/src/datajoint/schemas.py index 1f038321c..8f7acd19b 100644 --- a/src/datajoint/schemas.py +++ b/src/datajoint/schemas.py @@ -390,27 +390,27 @@ def spawn_missing_classes(self, context: dict[str, Any] | None = None) -> None: self._decorate_table(part_class, context=context, assert_declared=True) setattr(master_class, class_name, part_class) - def drop(self, force: bool = False) -> None: + def drop(self, prompt: bool | None = None) -> None: """ Drop the associated schema and all its tables. Parameters ---------- - force : bool, optional - If True, skip confirmation prompt. Default False. + prompt : bool, optional + If True, show confirmation prompt before dropping. + If False, drop without confirmation. + If None (default), use ``dj.config['safemode']`` setting. Raises ------ AccessError If insufficient permissions to drop the schema. """ + prompt = config["safemode"] if prompt is None else prompt + if not self.exists: logger.info("Schema named `{database}` does not exist. Doing nothing.".format(database=self.database)) - elif ( - not config["safemode"] - or force - or user_choice("Proceed to delete entire schema `%s`?" % self.database, default="no") == "yes" - ): + elif not prompt or user_choice("Proceed to delete entire schema `%s`?" % self.database, default="no") == "yes": logger.debug("Dropping `{database}`.".format(database=self.database)) try: self.connection.query("DROP DATABASE `{database}`".format(database=self.database)) diff --git a/src/datajoint/storage.py b/src/datajoint/storage.py index 0d401dbdf..6dacbd7ec 100644 --- a/src/datajoint/storage.py +++ b/src/datajoint/storage.py @@ -287,7 +287,7 @@ def fs(self) -> fsspec.AbstractFileSystem: def _create_filesystem(self) -> fsspec.AbstractFileSystem: """Create fsspec filesystem based on protocol.""" if self.protocol == "file": - return fsspec.filesystem("file") + return fsspec.filesystem("file", auto_mkdir=True) elif self.protocol == "s3": # Build S3 configuration diff --git a/src/datajoint/table.py b/src/datajoint/table.py index 17becc49c..7543c385e 100644 --- a/src/datajoint/table.py +++ b/src/datajoint/table.py @@ -813,8 +813,7 @@ def delete( self, transaction: bool = True, prompt: bool | None = None, - force_parts: bool = False, - force_masters: bool = False, + part_integrity: str = "enforce", ) -> int: """ Deletes the contents of the table and its dependent tables, recursively. @@ -825,9 +824,10 @@ def delete( nested within another transaction. prompt: If `True`, show what will be deleted and ask for confirmation. If `False`, delete without confirmation. Default is `dj.config['safemode']`. - force_parts: Delete from parts even when not deleting from their masters. - force_masters: If `True`, include part/master pairs in the cascade. - Default is `False`. + part_integrity: Policy for master-part integrity. One of: + - ``"enforce"`` (default): Error if parts would be deleted without masters. + - ``"ignore"``: Allow deleting parts without masters (breaks integrity). + - ``"cascade"``: Also delete masters when parts are deleted (maintains integrity). Returns: Number of deleted rows (excluding those from dependent tables). @@ -835,8 +835,11 @@ def delete( Raises: DataJointError: Delete exceeds maximum number of delete attempts. DataJointError: When deleting within an existing transaction. - DataJointError: Deleting a part table before its master. + DataJointError: Deleting a part table before its master (when part_integrity="enforce"). + ValueError: Invalid part_integrity value. """ + if part_integrity not in ("enforce", "ignore", "cascade"): + raise ValueError(f"part_integrity must be 'enforce', 'ignore', or 'cascade', got {part_integrity!r}") deleted = set() visited_masters = set() @@ -892,7 +895,7 @@ def cascade(table): master_name = get_master(child.full_table_name) if ( - force_masters + part_integrity == "cascade" and master_name and master_name != table.full_table_name and master_name not in visited_masters @@ -941,15 +944,16 @@ def cascade(table): self.connection.cancel_transaction() raise - if not force_parts: - # Avoid deleting from child before master (See issue #151) + if part_integrity == "enforce": + # Avoid deleting from part before master (See issue #151) for part in deleted: master = get_master(part) if master and master not in deleted: if transaction: self.connection.cancel_transaction() raise DataJointError( - "Attempt to delete part table {part} before deleting from its master {master} first.".format( + "Attempt to delete part table {part} before deleting from its master {master} first. " + "Use part_integrity='ignore' to allow, or part_integrity='cascade' to also delete master.".format( part=part, master=master ) ) diff --git a/src/datajoint/user_tables.py b/src/datajoint/user_tables.py index dc860b52c..535276bbd 100644 --- a/src/datajoint/user_tables.py +++ b/src/datajoint/user_tables.py @@ -213,32 +213,50 @@ class Part(UserTable, metaclass=PartMeta): + ")" ) - def delete(self, force=False, **kwargs): + def delete(self, part_integrity: str = "enforce", **kwargs): """ Delete from a Part table. Args: - force: If True, allow direct deletion from Part table. - If False (default), raise an error. + part_integrity: Policy for master-part integrity. One of: + - ``"enforce"`` (default): Error - delete from master instead. + - ``"ignore"``: Allow direct deletion (breaks master-part integrity). + - ``"cascade"``: Delete parts AND cascade up to delete master. **kwargs: Additional arguments passed to Table.delete() - (transaction, prompt, force_masters) + (transaction, prompt) Raises: - DataJointError: If force is False (direct Part deletes are prohibited) + DataJointError: If part_integrity="enforce" (direct Part deletes prohibited) """ - if force: - super().delete(force_parts=True, **kwargs) - else: - raise DataJointError("Cannot delete from a Part directly. Delete from master instead") - - def drop(self, force=False): + if part_integrity == "enforce": + raise DataJointError( + "Cannot delete from a Part directly. Delete from master instead, " + "or use part_integrity='ignore' to break integrity, " + "or part_integrity='cascade' to also delete master." + ) + super().delete(part_integrity=part_integrity, **kwargs) + + def drop(self, part_integrity: str = "enforce"): """ - unless force is True, prohibits direct deletes from parts. + Drop a Part table. + + Args: + part_integrity: Policy for master-part integrity. One of: + - ``"enforce"`` (default): Error - drop master instead. + - ``"ignore"``: Allow direct drop (breaks master-part structure). + Note: ``"cascade"`` is not supported for drop (too destructive). + + Raises: + DataJointError: If part_integrity="enforce" (direct Part drops prohibited) """ - if force: + if part_integrity == "ignore": super().drop() + elif part_integrity == "enforce": + raise DataJointError( + "Cannot drop a Part directly. Drop master instead, " "or use part_integrity='ignore' to force." + ) else: - raise DataJointError("Cannot drop a Part directly. Delete from master instead") + raise ValueError(f"part_integrity for drop must be 'enforce' or 'ignore', got {part_integrity!r}") def alter(self, prompt=True, context=None): # without context, use declaration context which maps master keyword to master table diff --git a/tests/integration/test_cascading_delete.py b/tests/integration/test_cascading_delete.py index 19d17dc08..28f175bea 100644 --- a/tests/integration/test_cascading_delete.py +++ b/tests/integration/test_cascading_delete.py @@ -38,7 +38,7 @@ def test_delete_tree(schema_simp_pop): def test_stepwise_delete(schema_simp_pop): assert not dj.config["safemode"], "safemode must be off for testing" assert L() and A() and B() and B.C(), "schema population failed" - B.C().delete(force=True) + B.C().delete(part_integrity="ignore") assert not B.C(), "failed to delete child tables" B().delete() assert not B(), "failed to delete from the parent table following child table deletion" @@ -113,19 +113,19 @@ def test_delete_parts_error(schema_simp_pop): """test issue #151""" with pytest.raises(dj.DataJointError): Profile().populate_random() - Website().delete(force_masters=False) + Website().delete(part_integrity="enforce") def test_delete_parts(schema_simp_pop): """test issue #151""" Profile().populate_random() - Website().delete(force_masters=True) + Website().delete(part_integrity="cascade") def test_delete_parts_complex(schema_simp_pop): """test issue #151 with complex master/part. PR #1158.""" prev_len = len(G()) - (A() & "id_a=1").delete(force_masters=True) + (A() & "id_a=1").delete(part_integrity="cascade") assert prev_len - len(G()) == 16, "Failed to delete parts" diff --git a/tests/integration/test_relational_operand.py b/tests/integration/test_relational_operand.py index 3b1586785..32fcc50d2 100644 --- a/tests/integration/test_relational_operand.py +++ b/tests/integration/test_relational_operand.py @@ -627,3 +627,49 @@ def test_top_errors(self, schema_simp_pop): assert "TypeError: Top limit must be an integer" == str(err3.exconly()) assert "TypeError: Top order_by attributes must all be strings" == str(err4.exconly()) assert "TypeError: The offset argument must be an integer" == str(err5.exconly()) + + def test_top_inherit_order(self, schema_simp_pop): + """Test that dj.Top(order_by=None) inherits existing ordering.""" + # First Top sets descending order, second Top inherits it + query = L() & dj.Top(10, "id_l desc") & dj.Top(3, order_by=None) + result = query.to_dicts() + assert len(result) == 3 + # Should be top 3 by descending id_l (L has ids 0-29) + assert result[0]["id_l"] > result[1]["id_l"] > result[2]["id_l"] + assert [r["id_l"] for r in result] == [29, 28, 27] + + def test_top_merge_identical_order(self, schema_simp_pop): + """Test that Tops with identical order_by are merged.""" + # Both Tops specify same ordering - should merge + query = L() & dj.Top(10, "id_l desc") & dj.Top(5, "id_l desc") + result = query.to_dicts() + # Merged limit is min(10, 5) = 5 + assert len(result) == 5 + assert [r["id_l"] for r in result] == [29, 28, 27, 26, 25] + + def test_top_merge_offsets_add(self, schema_simp_pop): + """Test that offsets are added when merging Tops.""" + # First Top: offset 2, second Top: offset 3, inherited order + query = L() & dj.Top(10, "id_l desc", offset=2) & dj.Top(3, order_by=None, offset=3) + result = query.to_dicts() + # Total offset = 2 + 3 = 5, so starts at 6th element (id_l=24) + assert len(result) == 3 + assert [r["id_l"] for r in result] == [24, 23, 22] + + def test_preview_respects_order(self, schema_simp_pop): + """Test that preview (to_arrays with limit) respects Top ordering (issue #1242).""" + # Apply descending order with no limit (None = unlimited) + query = L() & dj.Top(None, order_by="id_l desc") + # Preview should respect the ordering (single attr returns array directly) + id_l = query.to_arrays("id_l", limit=5) + assert list(id_l) == [29, 28, 27, 26, 25] + + def test_top_different_order_subquery(self, schema_simp_pop): + """Test that different orderings create subquery.""" + # First Top: descending, second Top: ascending - cannot merge + query = L() & dj.Top(10, "id_l desc") & dj.Top(3, "id_l asc") + result = query.to_dicts() + # Second Top reorders the result of first Top + # First Top gives ids 29-20, second Top takes lowest 3 of those + assert len(result) == 3 + assert [r["id_l"] for r in result] == [20, 21, 22] diff --git a/tests/schema_simple.py b/tests/schema_simple.py index cfa7fd9c4..f0e768d1f 100644 --- a/tests/schema_simple.py +++ b/tests/schema_simple.py @@ -141,9 +141,9 @@ class H(dj.Part): """ class M(dj.Part): - definition = """ # test force_masters revisit + definition = """ # test part_integrity cascade -> E - id_m :int + id_m : uint16 --- -> E.H """ diff --git a/tests/unit/test_condition.py b/tests/unit/test_condition.py new file mode 100644 index 000000000..3200e34c4 --- /dev/null +++ b/tests/unit/test_condition.py @@ -0,0 +1,95 @@ +"""Unit tests for condition.py - Top class and merge logic.""" + +import pytest +from datajoint.condition import Top + + +class TestTopMerge: + """Tests for Top.merge() method.""" + + def test_merge_inherits_order(self): + """When other.order_by is None, ordering is inherited.""" + top1 = Top(limit=10, order_by="score desc") + top2 = Top(limit=5, order_by=None) + merged = top1.merge(top2) + assert merged.order_by == ["score desc"] + assert merged.limit == 5 + assert merged.offset == 0 + + def test_merge_limits_take_min(self): + """Merged limit is minimum of both.""" + top1 = Top(limit=10, order_by="id") + top2 = Top(limit=3, order_by=None) + merged = top1.merge(top2) + assert merged.limit == 3 + + # Reverse order + top1 = Top(limit=3, order_by="id") + top2 = Top(limit=10, order_by=None) + merged = top1.merge(top2) + assert merged.limit == 3 + + def test_merge_none_limit_preserved(self): + """None limit (unlimited) is handled correctly.""" + top1 = Top(limit=None, order_by="id") + top2 = Top(limit=5, order_by=None) + merged = top1.merge(top2) + assert merged.limit == 5 + + top1 = Top(limit=5, order_by="id") + top2 = Top(limit=None, order_by=None) + merged = top1.merge(top2) + assert merged.limit == 5 + + top1 = Top(limit=None, order_by="id") + top2 = Top(limit=None, order_by=None) + merged = top1.merge(top2) + assert merged.limit is None + + def test_merge_offsets_add(self): + """Offsets are added together.""" + top1 = Top(limit=10, order_by="id", offset=5) + top2 = Top(limit=3, order_by=None, offset=2) + merged = top1.merge(top2) + assert merged.offset == 7 + + def test_merge_preserves_existing_order(self): + """Merged Top preserves first Top's ordering.""" + top1 = Top(limit=10, order_by=["col1 desc", "col2 asc"]) + top2 = Top(limit=5, order_by=None) + merged = top1.merge(top2) + assert merged.order_by == ["col1 desc", "col2 asc"] + + +class TestTopValidation: + """Tests for Top validation.""" + + def test_order_by_none_allowed(self): + """order_by=None is valid (means inherit).""" + top = Top(limit=5, order_by=None) + assert top.order_by is None + + def test_order_by_string_converted_to_list(self): + """Single string order_by is converted to list.""" + top = Top(order_by="id desc") + assert top.order_by == ["id desc"] + + def test_order_by_list_preserved(self): + """List order_by is preserved.""" + top = Top(order_by=["col1", "col2 desc"]) + assert top.order_by == ["col1", "col2 desc"] + + def test_invalid_limit_type_raises(self): + """Non-integer limit raises TypeError.""" + with pytest.raises(TypeError): + Top(limit="5") + + def test_invalid_order_by_type_raises(self): + """Non-string order_by raises TypeError.""" + with pytest.raises(TypeError): + Top(order_by=123) + + def test_invalid_offset_type_raises(self): + """Non-integer offset raises TypeError.""" + with pytest.raises(TypeError): + Top(offset="1")