diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 237c9ed0..97c8c97f 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,5 +1,4 @@ { - "$schema": "https://raw.githubusercontent.com/devcontainers/spec/main/schemas/devContainer.schema.json", "name": "nfcore", "image": "nfcore/devcontainer:latest", diff --git a/.devcontainer/setup.sh b/.devcontainer/setup.sh index b16c2e73..033278f7 100755 --- a/.devcontainer/setup.sh +++ b/.devcontainer/setup.sh @@ -10,4 +10,4 @@ export PS1='\[\e[3;36m\]\w ->\[\e[0m\\] ' nextflow self-update # Update welcome message -echo "Welcome to the nf-core/spatialxe devcontainer!" > /usr/local/etc/vscode-dev-containers/first-run-notice.txt +echo "Welcome to the nf-core/spatialaxe devcontainer!" > /usr/local/etc/vscode-dev-containers/first-run-notice.txt diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md new file mode 100644 index 00000000..21358536 --- /dev/null +++ b/.github/CONTRIBUTING.md @@ -0,0 +1,125 @@ +# `nf-core/spatialaxe`: Contributing Guidelines + +Hi there! +Many thanks for taking an interest in improving nf-core/spatialaxe. + +We try to manage the required tasks for nf-core/spatialaxe using GitHub issues, you probably came to this page when creating one. +Please use the pre-filled template to save time. + +However, don't be put off by this template - other more general issues and suggestions are welcome! +Contributions to the code are even more welcome ;) + +> [!NOTE] +> If you need help using or modifying nf-core/spatialaxe then the best place to ask is on the nf-core Slack [#spatialaxe](https://nfcore.slack.com/channels/spatialaxe) channel ([join our Slack here](https://nf-co.re/join/slack)). + +## Contribution workflow + +If you'd like to write some code for nf-core/spatialaxe, the standard workflow is as follows: + +1. Check that there isn't already an issue about your idea in the [nf-core/spatialaxe issues](https://github.com/nf-core/spatialaxe/issues) to avoid duplicating work. If there isn't one already, please create one so that others know you're working on this +2. [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the [nf-core/spatialaxe repository](https://github.com/nf-core/spatialaxe) to your GitHub account +3. Make the necessary changes / additions within your forked repository following [Pipeline conventions](#pipeline-contribution-conventions) +4. Use `nf-core pipelines schema build` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10). +5. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged + +If you're not used to this workflow with git, you can start with some [docs from GitHub](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests) or even their [excellent `git` resources](https://try.github.io/). + +## Tests + +You have the option to test your changes locally by running the pipeline. For receiving warnings about process selectors and other `debug` information, it is recommended to use the debug profile. Execute all the tests with the following command: + +```bash +nf-test test --profile debug,test,docker --verbose +``` + +When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. +Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. + +There are typically two types of tests that run: + +### Lint tests + +`nf-core` has a [set of guidelines](https://nf-co.re/developers/guidelines) which all pipelines must adhere to. +To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core pipelines lint ` command. + +If any failures or warnings are encountered, please follow the listed URL for more documentation. + +### Pipeline tests + +Each `nf-core` pipeline should be set up with a minimal set of test-data. +`GitHub Actions` then runs the pipeline on this data to ensure that it exits successfully. +If there are any failures then the automated tests fail. +These tests are run both with the latest available version of `Nextflow` and also the minimum required version that is stated in the pipeline code. + +## Patch + +:warning: Only in the unlikely and regretful event of a release happening with a bug. + +- On your own fork, make a new branch `patch` based on `upstream/main` or `upstream/master`. +- Fix the bug, and bump version (X.Y.Z+1). +- Open a pull-request from `patch` to `main`/`master` with the changes. + +## Getting help + +For further information/help, please consult the [nf-core/spatialaxe documentation](https://nf-co.re/spatialaxe/usage) and don't hesitate to get in touch on the nf-core Slack [#spatialaxe](https://nfcore.slack.com/channels/spatialaxe) channel ([join our Slack here](https://nf-co.re/join/slack)). + +## Pipeline contribution conventions + +To make the `nf-core/spatialaxe` code and processing logic more understandable for new contributors and to ensure quality, we semi-standardise the way the code and other contributions are written. + +### Adding a new step + +If you wish to contribute a new step, please use the following coding standards: + +1. Define the corresponding input channel into your new process from the expected previous process channel. +2. Write the process block (see below). +3. Define the output channel if needed (see below). +4. Add any new parameters to `nextflow.config` with a default (see below). +5. Add any new parameters to `nextflow_schema.json` with help text (via the `nf-core pipelines schema build` tool). +6. Add sanity checks and validation for all relevant parameters. +7. Perform local tests to validate that the new code works as expected. +8. If applicable, add a new test in the `tests` directory. +9. Update MultiQC config `assets/multiqc_config.yml` so relevant suffixes, file name clean up and module plots are in the appropriate order. If applicable, add a [MultiQC](https://https://multiqc.info/) module. +10. Add a description of the output files and if relevant any appropriate images from the MultiQC report to `docs/output.md`. + +### Default values + +Parameters should be initialised / defined with default values within the `params` scope in `nextflow.config`. + +Once there, use `nf-core pipelines schema build` to add to `nextflow_schema.json`. + +### Default processes resource requirements + +Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/main/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. + +The process resources can be passed on to the tool dynamically within the process with the `${task.cpus}` and `${task.memory}` variables in the `script:` block. + +### Naming schemes + +Please use the following naming schemes, to make it easy to understand what is going where. + +- initial process channel: `ch_output_from_` +- intermediate and terminal channels: `ch__for_` + +### Nextflow version bumping + +If you are using a new feature from core Nextflow, you may bump the minimum required version of nextflow in the pipeline with: `nf-core pipelines bump-version --nextflow . [min-nf-version]` + +### Images and figures + +For overview images and other documents we follow the nf-core [style guidelines and examples](https://nf-co.re/developers/design_guidelines). + +## GitHub Codespaces + +This repo includes a devcontainer configuration which will create a GitHub Codespaces for Nextflow development! This is an online developer environment that runs in your browser, complete with VSCode and a terminal. + +To get started: + +- Open the repo in [Codespaces](https://github.com/nf-core/spatialaxe/codespaces) +- Tools installed + - nf-core + - Nextflow + +Devcontainer specs: + +- [DevContainer config](.devcontainer/devcontainer.json) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 08eca313..b08b451d 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -8,7 +8,7 @@ body: Before you post this issue, please check the documentation: - [nf-core website: troubleshooting](https://nf-co.re/usage/troubleshooting) - - [nf-core/spatialxe pipeline documentation](https://nf-co.re/spatialxe/usage) + - [nf-core/spatialaxe pipeline documentation](https://nf-co.re/spatialaxe/usage) - type: textarea id: description attributes: @@ -46,4 +46,4 @@ body: * Executor _(eg. slurm, local, awsbatch)_ * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ * OS _(eg. CentOS Linux, macOS, Linux Mint)_ - * Version of nf-core/spatialxe _(eg. 1.1, 1.5, 1.8.2)_ + * Version of nf-core/spatialaxe _(eg. 1.1, 1.5, 1.8.2)_ diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 6192b81d..881458ad 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -2,6 +2,6 @@ contact_links: - name: Join nf-core url: https://nf-co.re/join about: Please join the nf-core community here - - name: "Slack #spatialxe channel" - url: https://nfcore.slack.com/channels/spatialxe - about: Discussion about the nf-core/spatialxe pipeline + - name: "Slack #spatialaxe channel" + url: https://nfcore.slack.com/channels/spatialaxe + about: Discussion about the nf-core/spatialaxe pipeline diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml index 9276baef..23ffd0c9 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.yml +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -1,5 +1,5 @@ name: Feature request -description: Suggest an idea for the nf-core/spatialxe pipeline +description: Suggest an idea for the nf-core/spatialaxe pipeline labels: enhancement body: - type: textarea diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 6a378da4..7d1f9dfd 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,22 +1,22 @@ ## PR checklist - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! -- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/spatialxe/tree/master/docs/CONTRIBUTING.md) -- [ ] If necessary, also make a PR on the nf-core/spatialxe _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/spatialaxe/tree/main/.github/CONTRIBUTING.md) +- [ ] If necessary, also make a PR on the nf-core/spatialaxe _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core pipelines lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). - [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). diff --git a/.github/actions/get-shards/action.yml b/.github/actions/get-shards/action.yml index e2833ee9..34085279 100644 --- a/.github/actions/get-shards/action.yml +++ b/.github/actions/get-shards/action.yml @@ -21,7 +21,7 @@ runs: using: "composite" steps: - name: Install nf-test - uses: nf-core/setup-nf-test@4069fbbaabe94c08faba4ad261bfa88225ba133f # v2 + uses: nf-core/setup-nf-test@v1 with: version: ${{ env.NFT_VER }} - name: Get number of shards diff --git a/.github/actions/nf-test/action.yml b/.github/actions/nf-test/action.yml index ad686e8e..3b9724c7 100644 --- a/.github/actions/nf-test/action.yml +++ b/.github/actions/nf-test/action.yml @@ -20,24 +20,24 @@ runs: using: "composite" steps: - name: Setup Nextflow - uses: nf-core/setup-nextflow@b4ec1bc7c16a94435159de94a05253542fddf6ef # v3 + uses: nf-core/setup-nextflow@v2 with: version: "${{ env.NXF_VERSION }}" - name: Set up Python - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6 + uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: python-version: "3.14" - name: Install nf-test - uses: nf-core/setup-nf-test@4069fbbaabe94c08faba4ad261bfa88225ba133f # v2 + uses: nf-core/setup-nf-test@v1 with: version: "${{ env.NFT_VER }}" install-pdiff: true - name: Setup apptainer if: contains(inputs.profile, 'singularity') - uses: eWaterCycle/setup-apptainer@3f706d898c9db585b1d741b4692e66755f3a1b40 # v2 + uses: eWaterCycle/setup-apptainer@main - name: Set up Singularity if: contains(inputs.profile, 'singularity') @@ -48,7 +48,7 @@ runs: - name: Conda setup if: contains(inputs.profile, 'conda') - uses: conda-incubator/setup-miniconda@8ee1f361103df19b6f8c8655fd3967a8ecb162d5 # v4 + uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3 with: auto-update-conda: true conda-solver: libmamba diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 5eb53aee..874c73e8 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -14,7 +14,7 @@ jobs: run-platform: name: Run AWS full tests # run only if the PR is approved by at least 2 reviewers and against the master/main branch or manually triggered - if: github.repository == 'nf-core/spatialxe' && github.event.review.state == 'approved' && (github.event.pull_request.base.ref == 'master' || github.event.pull_request.base.ref == 'main') || github.event_name == 'workflow_dispatch' || github.event_name == 'release' + if: github.repository == 'nf-core/spatialaxe' && github.event.review.state == 'approved' && (github.event.pull_request.base.ref == 'master' || github.event.pull_request.base.ref == 'main') || github.event_name == 'workflow_dispatch' || github.event_name == 'release' runs-on: ubuntu-latest steps: - name: Set revision variable @@ -23,8 +23,7 @@ jobs: echo "revision=${{ (github.event_name == 'workflow_dispatch' || github.event_name == 'release') && github.sha || 'dev' }}" >> "$GITHUB_OUTPUT" - name: Launch workflow via Seqera Platform - uses: seqeralabs/action-tower-launch@51565b514bff1827cf34620de25d0055759f1fc9 # v2 - # TODO nf-core: You can customise AWS full pipeline tests as required + uses: seqeralabs/action-tower-launch@v2 # Add full size test data (but still relatively small datasets for few samples) # on the `test_full.config` test runs with only one set of parameters with: @@ -32,34 +31,15 @@ jobs: access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ vars.TOWER_COMPUTE_ENV }} revision: ${{ steps.revision.outputs.revision }} - workdir: s3://${{ vars.AWS_S3_BUCKET }}/work/spatialxe/work-${{ steps.revision.outputs.revision }} - nextflow_config: | - plugins { - id 'nf-slack@0.5.0' - } - slack { - enabled = true - bot { - token = '${{ secrets.NFSLACK_BOT_TOKEN }}' - channel = 'spatialxe' - } - onStart { - enabled = false - } - onComplete { - message = ':white_check_mark: *spatialxe/test_full* completed successfully! :tada:' - } - onError { - message = ':x: *spatialxe/test_full* failed :crying_cat_face:' - } - } + workdir: s3://${{ vars.AWS_S3_BUCKET }}/work/spatialaxe/work-${{ steps.revision.outputs.revision }} parameters: | { - "outdir": "s3://${{ vars.AWS_S3_BUCKET }}/spatialxe/results-${{ steps.revision.outputs.revision }}" + "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", + "outdir": "s3://${{ vars.AWS_S3_BUCKET }}/spatialaxe/results-${{ steps.revision.outputs.revision }}" } profiles: test_full - - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7 + - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5 with: name: Seqera Platform debug log file path: | diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 632a6a5e..8a8d7f8e 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -7,25 +7,25 @@ on: jobs: run-platform: name: Run AWS tests - if: github.repository == 'nf-core/spatialxe' + if: github.repository == 'nf-core/spatialaxe' runs-on: ubuntu-latest steps: # Launch workflow using Seqera Platform CLI tool action - name: Launch workflow via Seqera Platform - uses: seqeralabs/action-tower-launch@51565b514bff1827cf34620de25d0055759f1fc9 # v2 + uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ vars.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ vars.TOWER_COMPUTE_ENV }} revision: ${{ github.sha }} - workdir: s3://${{ vars.AWS_S3_BUCKET }}/work/spatialxe/work-${{ github.sha }} + workdir: s3://${{ vars.AWS_S3_BUCKET }}/work/spatialaxe/work-${{ github.sha }} parameters: | { - "outdir": "s3://${{ vars.AWS_S3_BUCKET }}/spatialxe/results-test-${{ github.sha }}" + "outdir": "s3://${{ vars.AWS_S3_BUCKET }}/spatialaxe/results-test-${{ github.sha }}" } profiles: test - - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7 + - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5 with: name: Seqera Platform debug log file path: | diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index 873c6700..1d9a4442 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -13,15 +13,15 @@ jobs: steps: # PRs to the nf-core repo main/master branch are only ok if coming from the nf-core repo `dev` or any `patch` branches - name: Check PRs - if: github.repository == 'nf-core/spatialxe' + if: github.repository == 'nf-core/spatialaxe' run: | - { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/spatialxe ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] + { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/spatialaxe ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] # If the above check failed, post a comment on the PR explaining the failure # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets - name: Post PR comment if: failure() - uses: mshick/add-pr-comment@8e4927817251f1ff60c001f04568532b38e0b4a0 # v3 + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 with: message: | ## This PR is against the `${{github.event.pull_request.base.ref}}` branch :x: diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml index 172de6f3..6adb0fff 100644 --- a/.github/workflows/clean-up.yml +++ b/.github/workflows/clean-up.yml @@ -10,7 +10,7 @@ jobs: issues: write pull-requests: write steps: - - uses: actions/stale@b5d41d4e1d5dceea10e7104786b73624c18a190f # v10 + - uses: actions/stale@5f858e3efba33a5ca4407a664cc011ad407f2008 # v10 with: stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml index a7bf4fc2..45884ff9 100644 --- a/.github/workflows/download_pipeline.yml +++ b/.github/workflows/download_pipeline.yml @@ -38,16 +38,13 @@ jobs: runs-on: ubuntu-latest needs: configure steps: - - name: Check out pipeline code - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 - - name: Install Nextflow - uses: nf-core/setup-nextflow@b4ec1bc7c16a94435159de94a05253542fddf6ef # v3 + uses: nf-core/setup-nextflow@v2 - name: Disk space cleanup uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6 + - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: python-version: "3.14" architecture: "x64" @@ -57,15 +54,10 @@ jobs: with: apptainer-version: 1.3.4 - - name: Read .nf-core.yml - id: read_yml - run: | - echo "nf_core_version=$(yq '.nf_core_version' ${{ github.workspace }}/.nf-core.yml)" >> "$GITHUB_OUTPUT" - - name: Install dependencies run: | python -m pip install --upgrade pip - pip install nf-core==${{ steps.read_yml.outputs['nf_core_version'] }} + pip install git+https://github.com/nf-core/tools.git - name: Make a cache directory for the container images run: | @@ -135,7 +127,7 @@ jobs: fi - name: Upload Nextflow logfile for debugging purposes - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7 + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5 with: name: nextflow_logfile.txt path: .nextflow.log* diff --git a/.github/workflows/fix_linting.yml b/.github/workflows/fix_linting.yml index e9c90ca8..5f731f04 100644 --- a/.github/workflows/fix_linting.yml +++ b/.github/workflows/fix_linting.yml @@ -9,11 +9,11 @@ jobs: if: > contains(github.event.comment.html_url, '/pull/') && contains(github.event.comment.body, '@nf-core-bot fix linting') && - github.repository == 'nf-core/spatialxe' + github.repository == 'nf-core/spatialaxe' runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: token: ${{ secrets.nf_core_bot_auth_token }} @@ -31,18 +31,22 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - name: Install Nextflow - uses: nf-core/setup-nextflow@b4ec1bc7c16a94435159de94a05253542fddf6ef # v3 + # Install and run pre-commit + - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 + with: + python-version: "3.14" + + - name: Install pre-commit + run: pip install pre-commit - # Install and run prek - - name: Run prek - id: prek - uses: j178/prek-action@6ad80277337ad479fe43bd70701c3f7f8aa74db3 # v2 + - name: Run pre-commit + id: pre-commit + run: pre-commit run --all-files continue-on-error: true # indication that the linting has finished - name: react if linting finished succesfully - if: steps.prek.outcome == 'success' + if: steps.pre-commit.outcome == 'success' uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5 with: comment-id: ${{ github.event.comment.id }} @@ -50,7 +54,7 @@ jobs: - name: Commit & push changes id: commit-and-push - if: steps.prek.outcome == 'failure' + if: steps.pre-commit.outcome == 'failure' run: | git config user.email "core@nf-co.re" git config user.name "nf-core-bot" @@ -82,4 +86,4 @@ jobs: issue-number: ${{ github.event.issue.number }} body: | @${{ github.actor }} I tried to fix the linting errors, but it didn't work. Please fix them manually. - See [CI log](https://github.com/nf-core/spatialxe/actions/runs/${{ github.run_id }}) for more details. + See [CI log](https://github.com/nf-core/spatialaxe/actions/runs/${{ github.run_id }}) for more details. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 8738ffc9..7a527a34 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -11,31 +11,33 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 - - name: Install Nextflow - uses: nf-core/setup-nextflow@b4ec1bc7c16a94435159de94a05253542fddf6ef # v3 + - name: Set up Python 3.14 + uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 + with: + python-version: "3.14" - - name: Run prek - uses: j178/prek-action@6ad80277337ad479fe43bd70701c3f7f8aa74db3 # v2 + - name: Install pre-commit + run: pip install pre-commit + + - name: Run pre-commit + run: pre-commit run --all-files nf-core: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 - name: Install Nextflow - uses: nf-core/setup-nextflow@b4ec1bc7c16a94435159de94a05253542fddf6ef # v3 + uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6 + - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: python-version: "3.14" architecture: "x64" - - name: Setup uv - uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 - - name: read .nf-core.yml uses: pietrobolcato/action-read-yaml@9f13718d61111b69f30ab4ac683e67a56d254e1d # 1.1.0 id: read_yml @@ -43,10 +45,12 @@ jobs: config: ${{ github.workspace }}/.nf-core.yml - name: Install dependencies - run: uv tool install nf-core==${{ steps.read_yml.outputs['nf_core_version'] }} + run: | + python -m pip install --upgrade pip + pip install nf-core==${{ steps.read_yml.outputs['nf_core_version'] }} - name: Run nf-core pipelines lint - if: ${{ github.base_ref != 'master' || github.base_ref != 'main' }} + if: ${{ github.base_ref != 'master' }} env: GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -54,7 +58,7 @@ jobs: run: nf-core -l lint_log.txt pipelines lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md - name: Run nf-core pipelines lint --release - if: ${{ github.base_ref == 'master' || github.base_ref == 'main' }} + if: ${{ github.base_ref == 'master' }} env: GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -67,7 +71,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7 + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 5b0c24f7..e6e9bc26 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@b6e2e70617bc3265edd6dab6c906732b2f1ae151 # v21 + uses: dawidd6/action-download-artifact@ac66b43f0e6a346234dd65d4d0c8fbb31cb316e5 # v11 with: workflow: linting.yml workflow_conclusion: completed @@ -21,7 +21,7 @@ jobs: run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@70d2764d1a7d5d9560b100cbea0077fc8f633987 # v3 + uses: marocchino/sticky-pull-request-comment@773744901bac0e8cbb5a0dc842800d45e9b2b405 # v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml index efd72d65..410a24c3 100644 --- a/.github/workflows/nf-test.yml +++ b/.github/workflows/nf-test.yml @@ -18,7 +18,7 @@ concurrency: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - NFT_VER: "0.9.4" + NFT_VER: "0.9.5" NFT_WORKDIR: "~" NXF_ANSI_LOG: false NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity @@ -40,7 +40,7 @@ jobs: rm -rf ./* || true rm -rf ./.??* || true ls -la ./ - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: fetch-depth: 0 @@ -50,7 +50,7 @@ jobs: env: NFT_VER: ${{ env.NFT_VER }} with: - max_shards: 7 + max_shards: 12 - name: debug run: | @@ -64,6 +64,7 @@ jobs: runs-on: # use self-hosted runners - runs-on=${{ github.run_id }}-nf-test - runner=4cpu-linux-x64 + - volume=80gb strategy: fail-fast: false matrix: @@ -71,21 +72,23 @@ jobs: profile: [conda, docker, singularity] isMain: - ${{ github.base_ref == 'master' || github.base_ref == 'main' }} - # Exclude conda and singularity on dev + # Exclude conda and singularity on dev; conda disabled on all branches exclude: - isMain: false profile: "conda" - isMain: false profile: "singularity" + - isMain: true + profile: "conda" NXF_VER: - - "25.10.4" + - "25.04.0" - "latest-everything" env: NXF_ANSI_LOG: false TOTAL_SHARDS: ${{ needs.nf-test-changes.outputs.total_shards }} steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: fetch-depth: 0 diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml index 78d5dbe0..431d3d44 100644 --- a/.github/workflows/release-announcements.yml +++ b/.github/workflows/release-announcements.yml @@ -18,7 +18,7 @@ jobs: id: get_description run: | echo "description=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .description')" >> $GITHUB_OUTPUT - - uses: rzr/fediverse-action@563159eb8d45f70ab6aaba36ed55cd037e51f441 # master + - uses: rzr/fediverse-action@master with: access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} host: "mstdn.science" # custom host if not "mastodon.social" (default) @@ -34,7 +34,7 @@ jobs: bsky-post: runs-on: ubuntu-latest steps: - - uses: zentered/bluesky-post-action@5a91cc2ad10a304a4e96c16182dbe4918710bcf6 # v0.4.0 + - uses: zentered/bluesky-post-action@6461056ea355ea43b977e149f7bf76aaa572e5e8 # v0.3.0 with: post: | Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! diff --git a/.github/workflows/template-version-comment.yml b/.github/workflows/template-version-comment.yml index ea30827e..e8560fc7 100644 --- a/.github/workflows/template-version-comment.yml +++ b/.github/workflows/template-version-comment.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: ref: ${{ github.event.pull_request.head.sha }} @@ -29,7 +29,7 @@ jobs: run: echo "OUTPUT=$(pip list --outdated | grep nf-core)" >> ${GITHUB_ENV} - name: Post nf-core template version comment - uses: mshick/add-pr-comment@8e4927817251f1ff60c001f04568532b38e0b4a0 # v3 + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 if: | contains(env.OUTPUT, 'nf-core') with: @@ -42,5 +42,5 @@ jobs: > Your pipeline is using an old version of the nf-core template: ${{ steps.read_yml.outputs['nf_core_version'] }}. > Please update your pipeline to the latest version. > - > For more documentation on how to update your pipeline, please see the [Synchronisation documentation](https://nf-co.re/docs/developing/template-syncs/overview). + > For more documentation on how to update your pipeline, please see the [nf-core documentation](https://github.com/nf-core/tools?tab=readme-ov-file#sync-a-pipeline-with-the-template) and [Synchronisation documentation](https://nf-co.re/docs/contributing/sync). # diff --git a/.gitignore b/.gitignore index cc2b1a77..2ef7dde1 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,6 @@ testing/ testing* *.pyc null/ -.lineage/ +.nf-test/ +.nf-test.log +.nf-test-* diff --git a/.nf-core.yml b/.nf-core.yml index ae563bd5..1e213b56 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,21 +1,24 @@ lint: + actions_ci: false files_exist: - .github/workflows/awsfulltest.yml - .github/workflows/awstest.yml files_unchanged: - .gitignore - - assets/nf-core-spatialxe_logo_light.png - - docs/images/nf-core-spatialxe_logo_dark.png - - docs/images/nf-core-spatialxe_logo_light.png + - assets/nf-core-spatialaxe_logo_light.png + - docs/images/nf-core-spatialaxe_logo_dark.png + - docs/images/nf-core-spatialaxe_logo_light.png - .github/PULL_REQUEST_TEMPLATE.md -nf_core_version: 4.0.2 +nf_core_version: 3.5.2 repository_type: pipeline template: - author: Sameesh Kher, Florian Heyl + author: Sameesh Kher, Dongze He, Florian Heyl description: A pipeline for spatialomics Xenium In Situ data. force: false is_nfcore: true - name: spatialxe + name: spatialaxe org: nf-core outdir: . - version: 1.0.0 + skip_features: + - igenomes + version: 1.1.0dev diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f51e1a28..d06777a8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,7 +4,7 @@ repos: hooks: - id: prettier additional_dependencies: - - prettier@3.8.3 + - prettier@3.6.2 - repo: https://github.com/pre-commit/pre-commit-hooks rev: v6.0.0 hooks: @@ -13,21 +13,15 @@ repos: exclude: | (?x)^( .*ro-crate-metadata.json$| - modules/(?!local/).*| - subworkflows/(?!local/).*| + modules/nf-core/.*| + subworkflows/nf-core/.*| .*\.snap$ )$ - id: end-of-file-fixer exclude: | (?x)^( .*ro-crate-metadata.json$| - modules/(?!local/).*| - subworkflows/(?!local/).*| + modules/nf-core/.*| + subworkflows/nf-core/.*| .*\.snap$ )$ - - repo: https://github.com/seqeralabs/nf-lint-pre-commit - rev: v0.3.0 - hooks: - - id: nextflow-lint - files: '\.nf$|nextflow\.config$' - args: ["-output", "json"] diff --git a/.prettierignore b/.prettierignore index 63cde500..2255e3e3 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1,4 +1,6 @@ email_template.html +adaptivecard.json +slackreport.json .nextflow* work/ data/ @@ -10,5 +12,3 @@ testing* bin/ .nf-test/ ro-crate-metadata.json -modules/nf-core/ -subworkflows/nf-core/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 9f0d1258..afee0d0e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,11 +1,23 @@ -# nf-core/spatialxe: Changelog +# nf-core/spatialaxe: Changelog The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.0.0 - [date] +## 1.0.0dev - [date] -Initial release of nf-core/spatialxe, created with the [nf-core](https://nf-co.re/) template. +Initial release of nf-core/spatialaxe, created with the [nf-core](https://nf-co.re/) template. + +### `Added` + +### `Fixed` + +### `Dependencies` + +### `Deprecated` + +## 1.0.0 - [18.06.2026] + +Initial release of nf-core/spatialaxe, created with the [nf-core](https://nf-co.re/) template. ### `Added` diff --git a/CITATIONS.md b/CITATIONS.md index e8c14658..1457e968 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -1,4 +1,4 @@ -# nf-core/spatialxe: Citations +# nf-core/spatialaxe: Citations ## [nf-core](https://pubmed.ncbi.nlm.nih.gov/32055031/) @@ -10,10 +10,6 @@ ## Pipeline tools -- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) - -> Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. - - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. diff --git a/LICENSE b/LICENSE index df908d32..cf21b558 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) The nf-core/spatialxe team +Copyright (c) The nf-core/spatialaxe team Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index f3f2cbde..3ea92e69 100644 --- a/README.md +++ b/README.md @@ -1,101 +1,187 @@

- - nf-core/spatialxe + + nf-core/spatialaxe

-[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/spatialxe) -[![GitHub Actions CI Status](https://github.com/nf-core/spatialxe/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/spatialxe/actions/workflows/nf-test.yml) -[![GitHub Actions Linting Status](https://github.com/nf-core/spatialxe/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/spatialxe/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/spatialxe/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://github.com/codespaces/new/nf-core/spatialaxe) +[![GitHub Actions CI Status](https://github.com/nf-core/spatialaxe/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/spatialaxe/actions/workflows/nf-test.yml) +[![GitHub Actions Linting Status](https://github.com/nf-core/spatialaxe/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/spatialaxe/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/spatialaxe/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) -[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.4-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/) -[![nf-core template version](https://img.shields.io/badge/nf--core_template-4.0.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/4.0.2) -[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) +[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/) +[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.4.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.4.1) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) -[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/spatialxe) +[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/spatialaxe) -[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23spatialxe-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/spatialxe)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23spatialaxe-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/spatialaxe)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction -**nf-core/spatialxe** is a bioinformatics pipeline that ... +**nf-core/spatialaxe** is a bioinformatics best-practice processing and quality control pipeline for Xenium (and soon Atera) data. The current plan for the pipeline implementation is shown in the metromap below. **The pipeline is under active developement and changes might occure frequently**. - +![nf-core/spatialaxe-metromap](docs/images/spatialaxe-metromap.png) - -1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) +> [!NOTE] +> We are currently extending the pipeline for the [10x Atera system](https://www.10xgenomics.com/platforms/atera). + +## Tools supported + +The pipeline supports the following tools: + +- Segmenation methods: + - [Baysor](https://doi.org/10.1038/s41587-021-01044-w) + - [Cellpose](https://doi.org/10.1038/s41592-020-01018-x) + - [Xenium ranger (XR)](https://www.10xgenomics.com/support/software/xenium-ranger/latest) + - [StarDist](https://doi.org/10.48550/arXiv.2203.02284) +- Segmentation free methods: + - [Ficture](https://doi.org/10.1038/s41592-024-02415-2) + - [Baysor](https://doi.org/10.1038/s41587-021-01044-w) +- Transcript assignment methods: + - [Segger](https://doi.org/10.1101/2025.03.14.643160) + - [Proseg](https://doi.org/10.1038/s41592-025-02697-0) +- Utility methods: + - [SpatialData](https://doi.org/10.1038/s41592-024-02212-x) + - [Baysor](https://doi.org/10.1038/s41587-021-01044-w) +- QC methods: + - [MultiQC Xenium Extra Plugin](https://github.com/MultiQC/xenium-extra) + - [OPT](https://github.com/JEFworks-Lab/off-target-probe-tracker) ## Usage -> [!NOTE] -> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/get_started/environment_setup/overview) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/get_started/run-your-first-pipeline) with `-profile test` before running the workflow on actual data. +On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/spatialaxe/results). - +### Run image-based segmentation mode
-Now, you can run the pipeline using: +`CELLPOSE -> BAYSOR -> XR-IMPORT_SEGMENTATION -> SPATIALDATA -> QC` - +```bash +nextflow run nf-core/spatialaxe \ + -profile \ + --input samplesheet.csv \ + --outdir \ + --mode +``` + +### Run coordinate-based segmentation mode
+ +`PROSEG -> PROSEG2BAYSOR -> XR-IMPORT_SEGMENTATION -> SPATIALDATA -> QC` ```bash -nextflow run nf-core/spatialxe \ +nextflow run nf-core/spatialaxe \ -profile \ --input samplesheet.csv \ - --outdir + --outdir \ + --mode coordinate ``` +### Run segfree mode
+ +`BAYSOR_SEGFREE` + +```bash +nextflow run nf-core/spatialaxe \ + -profile \ + --input samplesheet.csv \ + --outdir \ + --mode segfree +``` + +### Run preview mode
+ +`BAYSOR_PREVIEW` + +```bash +nextflow run nf-core/spatialaxe \ + -profile \ + --input samplesheet.csv \ + --outdir \ + --mode preview +``` + +### Run just the quality control
+ +```bash +nextflow run nf-core/spatialaxe \ + -profile \ + --input samplesheet.csv \ + --outdir \ + --mode qc +``` + +### Additional information + > [!WARNING] -> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/running/run-pipelines#using-parameter-files). +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files). -For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/spatialxe/usage) and the [parameter documentation](https://nf-co.re/spatialxe/parameters). +For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/spatialaxe/usage) and the [parameter documentation](https://nf-co.re/spatialaxe/parameters). ## Pipeline output -To see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/spatialxe/results) tab on the nf-core website pipeline page. +To see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/spatialaxe/results) tab on the nf-core website pipeline page. For more details about the output files and reports, please refer to the -[output documentation](https://nf-co.re/spatialxe/output). +[output documentation](https://nf-co.re/spatialaxe/output). + +## Runtime and resource estimations + +| Tool | Compute | Runtime (min / med / max) | Peak RSS (min / med / max) | +| ------------------------- | ------- | ------------------------- | -------------------------- | +| Cellpose | GPU | 1m / 4m / 1.4h | 10 GB / 26 GB / 554 GB | +| Cellpose | CPU | 1.3h / 2.3h / 6.5h | 161 GB / 426 GB / 1115 GB | +| StarDist | GPU | 1m / 4m / 7m | 5 GB / 12 GB / 18 GB | +| StarDist | CPU | 5m / 6m / 7m | 18 GB / 18 GB / 18 GB | +| Segger (create_dataset) | GPU | 2m / 9m / 31m | 1.7 GB / 14 GB / 50 GB | +| Segger (create_dataset) | CPU | 13m / 21m / 46m | 13 GB / 19 GB / 49 GB | +| Segger (train) | GPU | 10m / 43m / 2.9h | 30 GB / 33 GB / 60 GB | +| Segger (predict) | GPU | 2m / 16m / 59m | 10 GB / 25 GB / 87 GB | +| Baysor (whole-image) | CPU | 2m / 30m / 17h | 6 GB / 10 GB / 650 GB | +| Baysor (tiled) | CPU | 1m / 18m / 13h | 0.2 GB / 34 GB / 530 GB | +| Proseg | CPU | 1m / 18m / 6.8h | 279 MB / 3.8 GB / 136 GB | +| XeniumRanger (resegment) | CPU | 18m / 39m / 3.7h | 28 GB / 54 GB / 60 GB | +| XeniumRanger (import_seg) | CPU | 2m / 7m / 2.7h | 2.6 GB / 11 GB / 51 GB | +| Ficture (preprocess) | CPU | 3m / 4m / 13m | 331 MB / 357 MB / 21 GB | + +- Cellpose GPU vs CPU: 35x faster on GPU (4m median vs 2.3h), 16x less memory (26 GB vs 426 GB) +- Segger: Only tool that truly requires GPU for all 3 steps (create_dataset, train, predict) +- StarDist: Very fast on CPU, GPU is not necessary to run its default model ## Credits -nf-core/spatialxe was originally written by Sameesh Kher, Florian Heyl. +nf-core/spatialaxe is mainly developed by [Sameesh Kher](https://github.com/khersameesh24), [Dongze He](https://github.com/dongzehe), and [Florian Heyl](https://github.com/heylf). We thank the following people for their extensive assistance in the development of this pipeline: - +- Tobias Krause +- Krešimir Beštak (kbestak) +- Matthias Hörtenhuber (mashehu) +- Maxime Garcia (maxulysse) +- Kübra Narcı (kubranarci) ## Contributions and Support -If you would like to contribute to this pipeline, please see the [contributing guidelines](docs/CONTRIBUTING.md). +If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md). -For further information or help, don't hesitate to get in touch on the [Slack `#spatialxe` channel](https://nfcore.slack.com/channels/spatialxe) (you can join with [this invite](https://nf-co.re/join/slack)). +For further information or help, don't hesitate to get in touch on the [Slack `#spatialaxe` channel](https://nfcore.slack.com/channels/spatialaxe) (you can join with [this invite](https://nf-co.re/join/slack)). ## Citations - - - - + An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. diff --git a/assets/adaptivecard.json b/assets/adaptivecard.json new file mode 100644 index 00000000..1fbf3f15 --- /dev/null +++ b/assets/adaptivecard.json @@ -0,0 +1,67 @@ +{ + "type": "message", + "attachments": [ + { + "contentType": "application/vnd.microsoft.card.adaptive", + "contentUrl": null, + "content": { + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "msteams": { + "width": "Full" + }, + "type": "AdaptiveCard", + "version": "1.2", + "body": [ + { + "type": "TextBlock", + "size": "Large", + "weight": "Bolder", + "color": "<% if (success) { %>Good<% } else { %>Attention<%} %>", + "text": "nf-core/spatialaxe v${version} - ${runName}", + "wrap": true + }, + { + "type": "TextBlock", + "spacing": "None", + "text": "Completed at ${dateComplete} (duration: ${duration})", + "isSubtle": true, + "wrap": true + }, + { + "type": "TextBlock", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors. The full error message was: ${errorReport}.<% } %>", + "wrap": true + }, + { + "type": "TextBlock", + "text": "The command used to launch the workflow was as follows:", + "wrap": true + }, + { + "type": "TextBlock", + "text": "${commandLine}", + "isSubtle": true, + "wrap": true + } + ], + "actions": [ + { + "type": "Action.ShowCard", + "title": "Pipeline Configuration", + "card": { + "type": "AdaptiveCard", + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "body": [ + { + "type": "FactSet", + "facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"}.join(",\n") %> + ] + } + ] + } + } + ] + } + } + ] +} diff --git a/assets/config/xenium.toml b/assets/config/xenium.toml new file mode 100644 index 00000000..c7740146 --- /dev/null +++ b/assets/config/xenium.toml @@ -0,0 +1,15 @@ +[data] +x = "x_location" +y = "y_location" +z = "z_location" +gene = "feature_name" +min_molecules_per_gene = 10 +exclude_genes = "NegControl*,BLANK_*,antisense_*" +min_molecules_per_cell = 50 + +[segmentation] +unassigned_prior_label = "UNASSIGNED" +prior_segmentation_confidence = 0.5 + +[plotting] +min_pixels_per_cell = 10 diff --git a/assets/email_template.html b/assets/email_template.html index 819a2f90..636a526a 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -4,21 +4,21 @@ - - nf-core/spatialxe Pipeline Report + + nf-core/spatialaxe Pipeline Report
-

nf-core/spatialxe ${version}

+

nf-core/spatialaxe ${version}

Run Name: $runName

<% if (!success){ out << """
-

nf-core/spatialxe execution completed unsuccessfully!

+

nf-core/spatialaxe execution completed unsuccessfully!

The exit status of the task that caused the workflow execution to fail was: $exitStatus.

The full error message was:

${errorReport}
@@ -27,7 +27,7 @@

nf-core/spatialxe execution completed } else { out << """
- nf-core/spatialxe execution completed successfully! + nf-core/spatialaxe execution completed successfully!
""" } @@ -44,8 +44,8 @@

Pipeline Configuration:

-

nf-core/spatialxe

-

https://github.com/nf-core/spatialxe

+

nf-core/spatialaxe

+

https://github.com/nf-core/spatialaxe

diff --git a/assets/email_template.txt b/assets/email_template.txt index f92d5849..9447a493 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -4,15 +4,15 @@ |\\ | |__ __ / ` / \\ |__) |__ } { | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, `._,._,' - nf-core/spatialxe ${version} + nf-core/spatialaxe ${version} ---------------------------------------------------- Run Name: $runName <% if (success){ - out << "## nf-core/spatialxe execution completed successfully! ##" + out << "## nf-core/spatialaxe execution completed successfully! ##" } else { out << """#################################################### -## nf-core/spatialxe execution completed unsuccessfully! ## +## nf-core/spatialaxe execution completed unsuccessfully! ## #################################################### The exit status of the task that caused the workflow execution to fail was: $exitStatus. The full error message was: @@ -35,5 +35,5 @@ Pipeline Configuration: <% out << summary.collect{ k,v -> " - $k: $v" }.join("\n") %> -- -nf-core/spatialxe -https://github.com/nf-core/spatialxe +nf-core/spatialaxe +https://github.com/nf-core/spatialaxe diff --git a/assets/example_samplesheet.csv b/assets/example_samplesheet.csv new file mode 100644 index 00000000..9cc36cf4 --- /dev/null +++ b/assets/example_samplesheet.csv @@ -0,0 +1,2 @@ +sample,bundle,image +xenium_prime_mouse_ileum,/home/user/raw_data/xenium/Xenium_Prime_Mouse_Ileum_tiny_outs,/home/user/raw_data/xenium/Xenium_Prime_Mouse_Ileum_tiny_outs/morphology.ome.tif diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml index 1ac4277b..abfb98ea 100644 --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -1,13 +1,12 @@ -id: "nf-core-spatialxe-methods-description" +id: "nf-core-spatialaxe-methods-description" description: "Suggested text and references to use when describing pipeline usage within the methods section of a publication." -section_name: "nf-core/spatialxe Methods Description" -section_href: "https://github.com/nf-core/spatialxe" +section_name: "nf-core/spatialaxe Methods Description" +section_href: "https://github.com/nf-core/spatialaxe" plot_type: "html" -## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline ## You inject any metadata in the Nextflow '${workflow}' object data: |

Methods

-

Data was processed using nf-core/spatialxe v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

+

Data was processed using nf-core/spatialaxe v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

${workflow.commandLine}

${tool_citations}

diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index a11d3d7f..47566390 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,15 +1,43 @@ report_comment: > - This report has been generated by the nf-core/spatialxe - analysis pipeline. For information about how to interpret these results, please see the - documentation. + This report has been generated by the nf-core/spatialaxe analysis pipeline. For information about how to interpret these results, please see the documentation. report_section_order: - "nf-core-spatialxe-methods-description": + "nf-core-spatialaxe-methods-description": order: -1000 software_versions: order: -1001 - "nf-core-spatialxe-summary": + "nf-core-spatialaxe-summary": order: -1002 export_plots: true disable_version_detection: true + +run_module: + - xenium + +module_order: + - xenium + +sp: + cell_feature_matrix: + fn: cell_feature_matrix.h5 + cells: + fn: cells.parquet + experiment: + fn: experiment.xenium + num_lines: 50 + metrics: + contents: num_cells_detected + fn: metrics_summary.csv + num_lines: 5 + transcripts: + fn: transcripts.parquet + +custom_data: + focus_density_plot: + pconfig: + title: "Focus Score: Per Sequence Density" + xlab: "CCFS Focus Score" + ylab: "Density" + ymin: 0 + logswitch: false diff --git a/assets/nf-core-spatialaxe_logo_light.png b/assets/nf-core-spatialaxe_logo_light.png new file mode 100644 index 00000000..c7a25c98 Binary files /dev/null and b/assets/nf-core-spatialaxe_logo_light.png differ diff --git a/assets/nf-core-spatialxe_logo_light.png b/assets/nf-core-spatialxe_logo_light.png deleted file mode 100644 index 8a8ca75b..00000000 Binary files a/assets/nf-core-spatialxe_logo_light.png and /dev/null differ diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 5f653ab7..72ad34f0 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,3 +1,2 @@ -sample,fastq_1,fastq_2 -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz, +sample,bundle,image +test_run,https://raw.githubusercontent.com/nf-core/test-datasets/spatialaxe/xenium_bundle.tar.gz, diff --git a/assets/schema_input.json b/assets/schema_input.json index 7435ad64..c6cda688 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -1,7 +1,7 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://raw.githubusercontent.com/nf-core/spatialxe/master/assets/schema_input.json", - "title": "nf-core/spatialxe pipeline - params.input schema", + "$id": "https://raw.githubusercontent.com/nf-core/spatialaxe/master/assets/schema_input.json", + "title": "nf-core/spatialaxe pipeline - params.input schema", "description": "Schema for the file provided with params.input", "type": "array", "items": { @@ -13,21 +13,17 @@ "errorMessage": "Sample name must be provided and cannot contain spaces", "meta": ["id"] }, - "fastq_1": { + "bundle": { "type": "string", - "format": "file-path", - "exists": true, - "pattern": "^([\\S\\s]*\\/)?[^\\s\\/]+\\.f(ast)?q\\.gz$", - "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "pattern": "^\\S+$", + "errorMessage": "Please provide a bundle as input data" }, - "fastq_2": { + "image": { "type": "string", - "format": "file-path", - "exists": true, - "pattern": "^([\\S\\s]*\\/)?[^\\s\\/]+\\.f(ast)?q\\.gz$", - "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "pattern": "^\\S+$", + "errorMessage": "You can provide an image. If you do not then please leave the field empty." } }, - "required": ["sample", "fastq_1"] + "required": ["sample", "bundle"] } } diff --git a/assets/sendmail_template.txt b/assets/sendmail_template.txt index cd864042..dd49eb59 100644 --- a/assets/sendmail_template.txt +++ b/assets/sendmail_template.txt @@ -9,12 +9,12 @@ Content-Type: text/html; charset=utf-8 $email_html --nfcoremimeboundary -Content-Type: image/png;name="nf-core-spatialxe_logo.png" +Content-Type: image/png;name="nf-core-spatialaxe_logo.png" Content-Transfer-Encoding: base64 Content-ID: -Content-Disposition: inline; filename="nf-core-spatialxe_logo_light.png" +Content-Disposition: inline; filename="nf-core-spatialaxe_logo_light.png" -<% out << new File("$projectDir/assets/nf-core-spatialxe_logo_light.png"). +<% out << new File("$projectDir/assets/nf-core-spatialaxe_logo_light.png"). bytes. encodeBase64(). toString(). diff --git a/assets/slackreport.json b/assets/slackreport.json new file mode 100644 index 00000000..a2991f65 --- /dev/null +++ b/assets/slackreport.json @@ -0,0 +1,34 @@ +{ + "attachments": [ + { + "fallback": "Plain-text summary of the attachment.", + "color": "<% if (success) { %>good<% } else { %>danger<%} %>", + "author_name": "nf-core/spatialaxe ${version} - ${runName}", + "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", + "fields": [ + { + "title": "Command used to launch the workflow", + "value": "```${commandLine}```", + "short": false + } + <% + if (!success) { %> + , + { + "title": "Full error message", + "value": "```${errorReport}```", + "short": false + }, + { + "title": "Pipeline configuration", + "value": "<% out << summary.collect{ k,v -> k == "hook_url" ? "_${k}_: (_hidden_)" : ( ( v.class.toString().contains('Path') || ( v.class.toString().contains('String') && v.contains('/') ) ) ? "_${k}_: `${v}`" : (v.class.toString().contains('DateTime') ? ("_${k}_: " + v.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM))) : "_${k}_: ${v}") ) }.join(",\n") %>", + "short": false + } + <% } + %> + ], + "footer": "Completed at <% out << dateComplete.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM)) %> (duration: ${duration})" + } + ] +} diff --git a/bin/baysor_create_dataset.py b/bin/baysor_create_dataset.py new file mode 100755 index 00000000..4e5a263a --- /dev/null +++ b/bin/baysor_create_dataset.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 +""" +Create a sampled dataset for Baysor preview mode. + +Reads a CSV transcript file and randomly samples a fraction of rows, +writing the result to a new CSV file. +""" + +import argparse +import csv +import os +import random +from pathlib import Path + + +class BaysorPreview(): + """ + Utility class to generate baysor preview dataset + """ + @staticmethod + def generate_dataset( + transcripts: Path, + sampled_transcripts: Path, + sample_fraction: float = 0.3, + random_state: int = 42, + prefix: str = "" + ) -> None: + """ + Reads a csv file & randomly samples a fraction of rows, + and writes the result to a .csv file. + + Args: + transcripts: unziped transcripts.csv from xenium bundle + sampled_transcripts: randomly subsampled transcripts.csv file + sample_fraction: Fraction of rows to sample + random_state: Seed for reproducibility + prefix: Output directory prefix + """ + + random.seed(random_state) + output_path = f"{prefix}/{sampled_transcripts}" + os.makedirs(os.path.dirname(output_path), exist_ok=True) + with open(transcripts, mode='rt', newline='') as infile, \ + open(output_path, mode='wt', newline='') as outfile: + + reader = csv.reader(infile) + writer = csv.writer(outfile) + + # get the header line + header = next(reader) + writer.writerow(header) + + # randomize csv rows to write + for row in reader: + if random.random() < float(sample_fraction): + writer.writerow(row) + + return None + + +def main() -> None: + """ + Run create dataset as nf module + """ + parser = argparse.ArgumentParser( + description="Create sampled dataset for Baysor preview" + ) + parser.add_argument( + "--transcripts", required=True, + help="Path to transcripts CSV file" + ) + parser.add_argument( + "--sample-fraction", required=True, type=float, + help="Fraction of rows to sample" + ) + parser.add_argument( + "--prefix", required=True, + help="Output directory prefix" + ) + args = parser.parse_args() + + sampled_transcripts = "sampled_transcripts.csv" + + # generate dataset + BaysorPreview.generate_dataset( + transcripts=args.transcripts, + sampled_transcripts=sampled_transcripts, + sample_fraction=args.sample_fraction, + prefix=args.prefix + ) + + return None + + +if __name__ == "__main__": + main() diff --git a/bin/baysor_preprocess_transcripts.py b/bin/baysor_preprocess_transcripts.py new file mode 100755 index 00000000..2662f83c --- /dev/null +++ b/bin/baysor_preprocess_transcripts.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python3 +""" +Preprocess Xenium transcripts for Baysor segmentation. + +Filters transcripts based on quality score and spatial coordinate thresholds, +removes negative control probes, and outputs filtered CSV for Baysor compatibility. +""" + +import argparse +import os + +import pandas as pd + + +def filter_transcripts( + transcripts: str, + min_qv: float = 20.0, + min_x: float = 0.0, + max_x: float = 24000.0, + min_y: float = 0.0, + max_y: float = 24000.0, + prefix: str = "", +) -> None: + """ + Filter transcripts based on the specified thresholds. + + Args: + transcripts: Path to transcripts parquet file + min_qv: Minimum Q-Score to pass filtering + min_x: Minimum x-coordinate threshold + max_x: Maximum x-coordinate threshold + min_y: Minimum y-coordinate threshold + max_y: Maximum y-coordinate threshold + prefix: Output directory prefix + """ + df = pd.read_parquet(transcripts, engine="pyarrow") + + # filter transcripts df with thresholds, ignore negative controls + filtered_df = df[ + (df["qv"] >= min_qv) + & (df["x_location"] >= min_x) + & (df["x_location"] <= max_x) + & (df["y_location"] >= min_y) + & (df["y_location"] <= max_y) + & (~df["feature_name"].str.startswith("NegControlProbe_")) + & (~df["feature_name"].str.startswith("antisense_")) + & (~df["feature_name"].str.startswith("NegControlCodeword_")) + & (~df["feature_name"].str.startswith("BLANK_")) + ] + + # change cell_id of cell-free transcripts to "0" (Baysor's no-cell sentinel). + # Modern Xenium stores cell_id as a string ("UNASSIGNED" for cell-free transcripts); + # legacy Xenium used integer -1. Normalize to string and handle both cases — pandas 3 + # rejects mixing int values into a string-dtype column. + filtered_df["cell_id"] = filtered_df["cell_id"].astype(str) + neg_cell_row = filtered_df["cell_id"].isin(["-1", "UNASSIGNED"]) + filtered_df.loc[neg_cell_row, "cell_id"] = "0" + + # Output filtered transcripts as CSV for Baysor 0.7.1 compatibility. + # Baysor's Julia Parquet.jl cannot read modern pyarrow Parquet files + # (pyarrow 15+ writes size_statistics Thrift field 16 unconditionally, + # which Baysor's old Thrift deserializer doesn't recognize). + os.makedirs(prefix, exist_ok=True) + filtered_df.to_csv(f"{prefix}/filtered_transcripts.csv", index=False) + + return None + + +def main() -> None: + """ + Run preprocess transcripts as nf module. + """ + parser = argparse.ArgumentParser( + description="Preprocess Xenium transcripts for Baysor" + ) + parser.add_argument( + "--transcripts", required=True, help="Path to transcripts parquet file" + ) + parser.add_argument("--prefix", required=True, help="Output directory prefix") + parser.add_argument( + "--min-qv", + type=float, + default=20.0, + help="Minimum Q-Score threshold (default: 20.0)", + ) + parser.add_argument( + "--min-x", + type=float, + default=0.0, + help="Minimum x-coordinate threshold (default: 0.0)", + ) + parser.add_argument( + "--max-x", + type=float, + default=24000.0, + help="Maximum x-coordinate threshold (default: 24000.0)", + ) + parser.add_argument( + "--min-y", + type=float, + default=0.0, + help="Minimum y-coordinate threshold (default: 0.0)", + ) + parser.add_argument( + "--max-y", + type=float, + default=24000.0, + help="Maximum y-coordinate threshold (default: 24000.0)", + ) + args = parser.parse_args() + + filter_transcripts( + transcripts=args.transcripts, + min_qv=args.min_qv, + min_x=args.min_x, + max_x=args.max_x, + min_y=args.min_y, + max_y=args.max_y, + prefix=args.prefix, + ) + + return None + + +if __name__ == "__main__": + main() diff --git a/bin/divide_transcripts.py b/bin/divide_transcripts.py new file mode 100755 index 00000000..133fcede --- /dev/null +++ b/bin/divide_transcripts.py @@ -0,0 +1,1312 @@ +#!/usr/bin/env python3 +"""Divide a Xenium transcripts.parquet file into spatial patches for tiled segmentation. + +Standalone script — no imports from xenium_patch or any local package. +Only uses stdlib + pyarrow + numpy. + +Two grid modes: + - Uniform (default): equal-sized tiles based on --tile-width + - Quadtree (--balanced): starts uniform, recursively subdivides dense tiles +""" + +from __future__ import annotations + +import argparse +import json +import math +import os +from concurrent.futures import ThreadPoolExecutor +from dataclasses import dataclass +from pathlib import Path + +import numpy as np +import pyarrow as pa +import pyarrow.compute as pc +import pyarrow.parquet as pq + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +XENIUM_PIXEL_SIZE_UM: float = 0.2125 + +TRANSCRIPT_COLS = [ + "transcript_id", + "cell_id", + "overlaps_nucleus", + "feature_name", + "x_location", + "y_location", + "z_location", + "qv", +] + +# Quadtree defaults +QUADTREE_MIN_TILE_WIDTH_UM: float = 200.0 +QUADTREE_MAX_DEPTH: int = 4 +QUADTREE_HISTOGRAM_BINS: int = 500 + +# --------------------------------------------------------------------------- +# Data types +# --------------------------------------------------------------------------- + + +@dataclass(frozen=True) +class Bounds: + """Axis-aligned bounding box in either pixel or micron coordinates.""" + + x_min: float + x_max: float + y_min: float + y_max: float + + @property + def width(self) -> float: + return self.x_max - self.x_min + + @property + def height(self) -> float: + return self.y_max - self.y_min + + +@dataclass(frozen=True) +class PatchInfo: + """Metadata for a single patch in the grid.""" + + patch_id: str + row: int + col: int + global_bounds_px: Bounds + global_bounds_um: Bounds + core_bounds_px: Bounds + core_bounds_um: Bounds + + +# --------------------------------------------------------------------------- +# Grid computation — uniform +# --------------------------------------------------------------------------- + + +def _compute_uniform_grid( + image_height_px: int, + image_width_px: int, + grid_rows: int, + grid_cols: int, + overlap_px: int, + pixel_size_um: float, +) -> list[PatchInfo]: + """ + Compute a regular NxM grid of overlapping patches. + + Grid is computed in pixel space. Each patch overlaps its neighbors by + overlap_px pixels. Core regions are computed such that every pixel + belongs to exactly one core. + + Args: + image_height_px: Image height in pixels. + image_width_px: Image width in pixels. + grid_rows: Number of rows in the patch grid. + grid_cols: Number of columns in the patch grid. + overlap_px: Overlap between adjacent patches in pixels. + pixel_size_um: Microns per pixel. + + Returns: + List of PatchInfo for every patch. + """ + step_x = (image_width_px - overlap_px) / grid_cols + step_y = (image_height_px - overlap_px) / grid_rows + + patches: list[PatchInfo] = [] + for row in range(grid_rows): + for col in range(grid_cols): + x_min_px = int(round(col * step_x)) + y_min_px = int(round(row * step_y)) + x_max_px = min( + int(round(col * step_x + step_x + overlap_px)), image_width_px + ) + y_max_px = min( + int(round(row * step_y + step_y + overlap_px)), image_height_px + ) + + global_bounds_px = Bounds(x_min_px, x_max_px, y_min_px, y_max_px) + + # Core bounds: trim half-overlap from sides that have neighbors + half_overlap = overlap_px // 2 + remainder = overlap_px % 2 + core_x_min = x_min_px + (half_overlap + remainder if col > 0 else 0) + core_x_max = x_max_px - (half_overlap if col < grid_cols - 1 else 0) + core_y_min = y_min_px + (half_overlap + remainder if row > 0 else 0) + core_y_max = y_max_px - (half_overlap if row < grid_rows - 1 else 0) + + core_bounds_px = Bounds(core_x_min, core_x_max, core_y_min, core_y_max) + + global_bounds_um = Bounds( + x_min_px * pixel_size_um, + x_max_px * pixel_size_um, + y_min_px * pixel_size_um, + y_max_px * pixel_size_um, + ) + core_bounds_um = Bounds( + core_x_min * pixel_size_um, + core_x_max * pixel_size_um, + core_y_min * pixel_size_um, + core_y_max * pixel_size_um, + ) + + patches.append( + PatchInfo( + patch_id=f"patch_{row}_{col}", + row=row, + col=col, + global_bounds_px=global_bounds_px, + global_bounds_um=global_bounds_um, + core_bounds_px=core_bounds_px, + core_bounds_um=core_bounds_um, + ) + ) + + return patches + + +def compute_tilewidth_uniform_grid( + image_height_px: int, + image_width_px: int, + tile_width_um: float, + overlap_um: float, + pixel_size_um: float, + transcript_extent_um: Bounds, +) -> tuple[list[PatchInfo], int, int, int]: + """ + Compute a uniform grid from a tile width in microns. + + Args: + image_height_px: Image height in pixels. + image_width_px: Image width in pixels. + tile_width_um: Desired tile width in microns. + overlap_um: Overlap between adjacent patches in microns. + pixel_size_um: Size of one pixel in microns. + transcript_extent_um: Bounding box of transcript coordinates. + + Returns: + Tuple of (patches, grid_rows, grid_cols, overlap_px). + """ + image_width_um = image_width_px * pixel_size_um + image_height_um = image_height_px * pixel_size_um + cols = max(1, math.ceil(image_width_um / tile_width_um)) + rows = max(1, math.ceil(image_height_um / tile_width_um)) + overlap_px = int(math.ceil(overlap_um / pixel_size_um)) + + patches = _compute_uniform_grid( + image_height_px, image_width_px, rows, cols, overlap_px, pixel_size_um + ) + return patches, rows, cols, overlap_px + + +# --------------------------------------------------------------------------- +# Grid computation — density quadtree +# --------------------------------------------------------------------------- + + +def _build_prefix_sum( + x_coords_um: np.ndarray, + y_coords_um: np.ndarray, + n_bins: int = QUADTREE_HISTOGRAM_BINS, +) -> tuple[np.ndarray, np.ndarray, np.ndarray]: + """ + Build a 2D histogram and its prefix sum for fast rectangle count queries. + + Args: + x_coords_um: Transcript X coordinates in microns. + y_coords_um: Transcript Y coordinates in microns. + n_bins: Number of bins along each axis. + + Returns: + Tuple of (prefix_sum, x_edges, y_edges). + """ + x_min, x_max = float(np.min(x_coords_um)), float(np.max(x_coords_um)) + y_min, y_max = float(np.min(y_coords_um)), float(np.max(y_coords_um)) + + eps = 1e-6 + x_edges = np.linspace(x_min, x_max + eps, n_bins + 1) + y_edges = np.linspace(y_min, y_max + eps, n_bins + 1) + + hist, _, _ = np.histogram2d(x_coords_um, y_coords_um, bins=[x_edges, y_edges]) + # hist shape is (n_bins_x, n_bins_y), transpose to (y, x) for row-major access + hist = hist.T + + prefix_sum = np.cumsum(np.cumsum(hist, axis=0), axis=1) + return prefix_sum, x_edges, y_edges + + +def _count_transcripts_in_rect( + prefix_sum: np.ndarray, + x_edges: np.ndarray, + y_edges: np.ndarray, + x_min_um: float, + x_max_um: float, + y_min_um: float, + y_max_um: float, +) -> int: + """ + Count transcripts in a rectangle using a 2D prefix sum array. + + Args: + prefix_sum: 2D cumulative sum array (n_bins_y x n_bins_x). + x_edges: Histogram bin edges along X. + y_edges: Histogram bin edges along Y. + x_min_um: Left bound in microns. + x_max_um: Right bound in microns. + y_min_um: Top bound in microns. + y_max_um: Bottom bound in microns. + + Returns: + Approximate transcript count in the rectangle. + """ + col_lo = max(0, int(np.searchsorted(x_edges, x_min_um, side="right")) - 1) + col_hi = min( + len(x_edges) - 1, int(np.searchsorted(x_edges, x_max_um, side="right")) - 1 + ) + row_lo = max(0, int(np.searchsorted(y_edges, y_min_um, side="right")) - 1) + row_hi = min( + len(y_edges) - 1, int(np.searchsorted(y_edges, y_max_um, side="right")) - 1 + ) + + col_hi = min(col_hi, prefix_sum.shape[1] - 1) + row_hi = min(row_hi, prefix_sum.shape[0] - 1) + + if col_lo > col_hi or row_lo > row_hi: + return 0 + + total = int( + prefix_sum[row_hi, col_hi] + - (prefix_sum[row_lo - 1, col_hi] if row_lo > 0 else 0) + - (prefix_sum[row_hi, col_lo - 1] if col_lo > 0 else 0) + + (prefix_sum[row_lo - 1, col_lo - 1] if row_lo > 0 and col_lo > 0 else 0) + ) + return max(0, total) + + +def _subdivide_regions( + regions: list[tuple[float, float, float, float]], + prefix_sum: np.ndarray, + x_edges: np.ndarray, + y_edges: np.ndarray, + max_transcripts: int, + min_tile_width_um: float, + max_depth: int, +) -> list[tuple[float, float, float, float]]: + """ + Recursively subdivide regions exceeding the transcript threshold. + + Uses a stack instead of recursion for large grids. + + Args: + regions: List of (x_min, x_max, y_min, y_max) tuples in microns. + prefix_sum: 2D prefix sum for fast counting. + x_edges: Histogram X bin edges. + y_edges: Histogram Y bin edges. + max_transcripts: Maximum transcripts allowed per region. + min_tile_width_um: Minimum tile dimension before stopping. + max_depth: Maximum recursion depth. + + Returns: + List of final (x_min, x_max, y_min, y_max) regions. + """ + result: list[tuple[float, float, float, float]] = [] + stack: list[tuple[tuple[float, float, float, float], int]] = [ + (r, 0) for r in regions + ] + + while stack: + region, depth = stack.pop() + x_min, x_max, y_min, y_max = region + width = x_max - x_min + height = y_max - y_min + + count = _count_transcripts_in_rect( + prefix_sum, x_edges, y_edges, x_min, x_max, y_min, y_max + ) + + if count <= max_transcripts or depth >= max_depth: + result.append(region) + continue + + if min(width, height) / 2 < min_tile_width_um: + result.append(region) + continue + + # Split into 4 quadrants + mid_x = (x_min + x_max) / 2 + mid_y = (y_min + y_max) / 2 + children = [ + (x_min, mid_x, y_min, mid_y), + (mid_x, x_max, y_min, mid_y), + (x_min, mid_x, mid_y, y_max), + (mid_x, x_max, mid_y, y_max), + ] + for child in children: + stack.append((child, depth + 1)) + + return result + + +def _regions_to_patches( + regions: list[tuple[float, float, float, float]], + overlap_um: float, + overlap_px: int, + pixel_size_um: float, + image_width_px: int, + image_height_px: int, +) -> list[PatchInfo]: + """ + Convert quadtree regions to PatchInfo objects with overlap. + + Args: + regions: Sorted list of (x_min, x_max, y_min, y_max) in microns. + overlap_um: Overlap in microns. + overlap_px: Overlap in pixels. + pixel_size_um: Microns per pixel. + image_width_px: Image width in pixels. + image_height_px: Image height in pixels. + + Returns: + List of PatchInfo objects. + """ + patches: list[PatchInfo] = [] + for i, (x_min_um, x_max_um, y_min_um, y_max_um) in enumerate(regions): + # Core bounds in pixels + core_x_min_px = max( + 0, min(int(round(x_min_um / pixel_size_um)), image_width_px) + ) + core_x_max_px = max( + 0, min(int(round(x_max_um / pixel_size_um)), image_width_px) + ) + core_y_min_px = max( + 0, min(int(round(y_min_um / pixel_size_um)), image_height_px) + ) + core_y_max_px = max( + 0, min(int(round(y_max_um / pixel_size_um)), image_height_px) + ) + + core_bounds_px = Bounds( + core_x_min_px, core_x_max_px, core_y_min_px, core_y_max_px + ) + + # Global bounds: core extended by overlap, clamped to image + global_x_min_px = max(0, core_x_min_px - overlap_px) + global_x_max_px = min(image_width_px, core_x_max_px + overlap_px) + global_y_min_px = max(0, core_y_min_px - overlap_px) + global_y_max_px = min(image_height_px, core_y_max_px + overlap_px) + + global_bounds_px = Bounds( + global_x_min_px, global_x_max_px, global_y_min_px, global_y_max_px + ) + + core_bounds_um = Bounds( + core_x_min_px * pixel_size_um, + core_x_max_px * pixel_size_um, + core_y_min_px * pixel_size_um, + core_y_max_px * pixel_size_um, + ) + global_bounds_um = Bounds( + global_x_min_px * pixel_size_um, + global_x_max_px * pixel_size_um, + global_y_min_px * pixel_size_um, + global_y_max_px * pixel_size_um, + ) + + patches.append( + PatchInfo( + patch_id=f"patch_{i}", + row=i, + col=0, + global_bounds_px=global_bounds_px, + global_bounds_um=global_bounds_um, + core_bounds_px=core_bounds_px, + core_bounds_um=core_bounds_um, + ) + ) + + return patches + + +def compute_density_quadtree_grid( + image_height_px: int, + image_width_px: int, + tile_width_um: float, + overlap_um: float, + pixel_size_um: float, + x_coords_um: np.ndarray, + y_coords_um: np.ndarray, + max_transcripts_per_patch: int | None = None, + min_tile_width_um: float = QUADTREE_MIN_TILE_WIDTH_UM, + max_depth: int = QUADTREE_MAX_DEPTH, +) -> tuple[list[PatchInfo], int, int, int]: + """ + Compute an adaptive quadtree grid that subdivides dense regions. + + Starts with a uniform grid derived from tile_width_um, then recursively + subdivides patches exceeding max_transcripts_per_patch. + + Args: + image_height_px: Image height in pixels. + image_width_px: Image width in pixels. + tile_width_um: Base tile width in microns. + overlap_um: Overlap between adjacent patches in microns. + pixel_size_um: Microns per pixel. + x_coords_um: Transcript X coordinates in microns. + y_coords_um: Transcript Y coordinates in microns. + max_transcripts_per_patch: Target max transcripts per patch. + If None, auto-computed as 2x the average per initial patch. + min_tile_width_um: Minimum tile dimension before stopping. + max_depth: Maximum recursion depth. + + Returns: + Tuple of (patches, initial_rows, initial_cols, overlap_px). + """ + image_width_um = image_width_px * pixel_size_um + image_height_um = image_height_px * pixel_size_um + overlap_px = int(math.ceil(overlap_um / pixel_size_um)) + + initial_cols = max(1, math.ceil(image_width_um / tile_width_um)) + initial_rows = max(1, math.ceil(image_height_um / tile_width_um)) + + # Build prefix sum for fast counting + prefix_sum, x_edges, y_edges = _build_prefix_sum(x_coords_um, y_coords_um) + + # Define initial regions in microns + cell_width_um = image_width_um / initial_cols + cell_height_um = image_height_um / initial_rows + + initial_regions: list[tuple[float, float, float, float]] = [] + for row in range(initial_rows): + for col in range(initial_cols): + x_min = col * cell_width_um + x_max = min((col + 1) * cell_width_um, image_width_um) + y_min = row * cell_height_um + y_max = min((row + 1) * cell_height_um, image_height_um) + initial_regions.append((x_min, x_max, y_min, y_max)) + + # Auto-compute threshold + n_initial = len(initial_regions) + total_transcripts = len(x_coords_um) + if max_transcripts_per_patch is None: + max_transcripts_per_patch = max(1, int(total_transcripts / n_initial * 2)) + + # Recursive subdivision + final_regions = _subdivide_regions( + initial_regions, + prefix_sum, + x_edges, + y_edges, + max_transcripts_per_patch, + min_tile_width_um, + max_depth, + ) + + # Sort by (y_min, x_min) for deterministic ordering + final_regions.sort(key=lambda r: (r[2], r[0])) + + # Convert to PatchInfo + patches = _regions_to_patches( + final_regions, + overlap_um, + overlap_px, + pixel_size_um, + image_width_px, + image_height_px, + ) + + return patches, initial_rows, initial_cols, overlap_px + + +# --------------------------------------------------------------------------- +# Sparse tile merging +# --------------------------------------------------------------------------- + + +def _count_transcripts_per_tile( + patches: list[PatchInfo], + x_coords_um: np.ndarray, + y_coords_um: np.ndarray, +) -> dict[str, int]: + """ + Count transcripts falling within each patch's core bounds. + + Uses core bounds (not global) to avoid double-counting transcripts + in overlap regions. + + Args: + patches: List of PatchInfo objects. + x_coords_um: Transcript X coordinates in microns. + y_coords_um: Transcript Y coordinates in microns. + + Returns: + Dict mapping patch_id to transcript count. + """ + counts: dict[str, int] = {} + for p in patches: + cb = p.core_bounds_um + mask = ( + (x_coords_um >= cb.x_min) + & (x_coords_um < cb.x_max) + & (y_coords_um >= cb.y_min) + & (y_coords_um < cb.y_max) + ) + counts[p.patch_id] = int(np.sum(mask)) + return counts + + +def _find_adjacent_patches( + patches: list[PatchInfo], +) -> dict[str, list[str]]: + """ + Build an adjacency map: patches sharing a core bounds edge are neighbors. + + Two patches are adjacent if their core bounds share an edge (touch or + overlap along one axis while overlapping along the other axis). + + Args: + patches: List of PatchInfo objects. + + Returns: + Dict mapping patch_id to list of adjacent patch_ids. + """ + adjacency: dict[str, list[str]] = {p.patch_id: [] for p in patches} + eps = 1.0 # tolerance in microns for edge sharing + + for i, a in enumerate(patches): + for j in range(i + 1, len(patches)): + b = patches[j] + ac = a.core_bounds_um + bc = b.core_bounds_um + + # Check X-axis overlap (cores overlap in X) + x_overlap = ac.x_min < bc.x_max and bc.x_min < ac.x_max + # Check Y-axis overlap (cores overlap in Y) + y_overlap = ac.y_min < bc.y_max and bc.y_min < ac.y_max + + # Adjacent along X: share a vertical edge, overlap in Y + x_touching = ( + abs(ac.x_max - bc.x_min) < eps or abs(bc.x_max - ac.x_min) < eps + ) + # Adjacent along Y: share a horizontal edge, overlap in X + y_touching = ( + abs(ac.y_max - bc.y_min) < eps or abs(bc.y_max - ac.y_min) < eps + ) + + if (x_touching and y_overlap) or (y_touching and x_overlap): + adjacency[a.patch_id].append(b.patch_id) + adjacency[b.patch_id].append(a.patch_id) + + return adjacency + + +def _recalculate_core_bounds( + patches: list[PatchInfo], + overlap_px: int, + pixel_size_um: float, + image_width_px: int, + image_height_px: int, +) -> list[PatchInfo]: + """ + Recalculate core bounds for all patches after merging. + + Core bounds are derived from the regions: the core is the + non-overlapping portion of each tile. After merging, we extract + core regions from global bounds by trimming the overlap, then + rebuild PatchInfo objects. + + For merged grids where tiles may be irregular, core bounds equal + the global bounds shrunk by half the overlap on each side that has + a neighbor, clamped to the image extent. + + Args: + patches: Current list of PatchInfo (with updated global bounds). + overlap_px: Overlap in pixels. + pixel_size_um: Microns per pixel. + image_width_px: Image width in pixels. + image_height_px: Image height in pixels. + + Returns: + New list of PatchInfo with recalculated core and global bounds. + """ + if not patches: + return [] + + # Extract core regions in microns from global bounds minus overlap + half_overlap_um = (overlap_px * pixel_size_um) / 2.0 + image_width_um = image_width_px * pixel_size_um + image_height_um = image_height_px * pixel_size_um + + # Collect all core regions (global shrunk by half overlap) + core_regions_um: list[tuple[float, float, float, float]] = [] + for p in patches: + gb = p.global_bounds_um + # Shrink by half overlap on each side, but not past image edge + cx_min = gb.x_min + (half_overlap_um if gb.x_min > 0 else 0) + cx_max = gb.x_max - (half_overlap_um if gb.x_max < image_width_um else 0) + cy_min = gb.y_min + (half_overlap_um if gb.y_min > 0 else 0) + cy_max = gb.y_max - (half_overlap_um if gb.y_max < image_height_um else 0) + core_regions_um.append((cx_min, cx_max, cy_min, cy_max)) + + # Rebuild patches using core regions -> global bounds (core + overlap) + result: list[PatchInfo] = [] + for i, p in enumerate(patches): + cx_min, cx_max, cy_min, cy_max = core_regions_um[i] + + # Core bounds in pixels + core_x_min_px = max(0, min(int(round(cx_min / pixel_size_um)), image_width_px)) + core_x_max_px = max(0, min(int(round(cx_max / pixel_size_um)), image_width_px)) + core_y_min_px = max(0, min(int(round(cy_min / pixel_size_um)), image_height_px)) + core_y_max_px = max(0, min(int(round(cy_max / pixel_size_um)), image_height_px)) + + core_bounds_px = Bounds( + core_x_min_px, core_x_max_px, core_y_min_px, core_y_max_px + ) + + # Global bounds: core extended by overlap, clamped to image + global_x_min_px = max(0, core_x_min_px - overlap_px) + global_x_max_px = min(image_width_px, core_x_max_px + overlap_px) + global_y_min_px = max(0, core_y_min_px - overlap_px) + global_y_max_px = min(image_height_px, core_y_max_px + overlap_px) + + global_bounds_px = Bounds( + global_x_min_px, global_x_max_px, global_y_min_px, global_y_max_px + ) + + core_bounds_um = Bounds( + core_x_min_px * pixel_size_um, + core_x_max_px * pixel_size_um, + core_y_min_px * pixel_size_um, + core_y_max_px * pixel_size_um, + ) + global_bounds_um = Bounds( + global_x_min_px * pixel_size_um, + global_x_max_px * pixel_size_um, + global_y_min_px * pixel_size_um, + global_y_max_px * pixel_size_um, + ) + + result.append( + PatchInfo( + patch_id=p.patch_id, + row=p.row, + col=p.col, + global_bounds_px=global_bounds_px, + global_bounds_um=global_bounds_um, + core_bounds_px=core_bounds_px, + core_bounds_um=core_bounds_um, + ) + ) + + return result + + +def merge_sparse_tiles( + patches: list[PatchInfo], + x_coords_um: np.ndarray, + y_coords_um: np.ndarray, + overlap_px: int, + pixel_size_um: float, + image_width_px: int, + image_height_px: int, + min_transcripts: int = 1000, +) -> tuple[list[PatchInfo], int]: + """ + Merge tiles below min_transcripts into their least populated adjacent neighbor. + + Iteratively finds the sparsest tile below the threshold and merges it + into its smallest neighbor for balanced tile sizes. Repeats until no + tiles remain below the threshold (or a tile has no neighbors to merge into). + + Args: + patches: List of PatchInfo objects from grid computation. + x_coords_um: Transcript X coordinates in microns. + y_coords_um: Transcript Y coordinates in microns. + overlap_px: Overlap in pixels. + pixel_size_um: Microns per pixel. + image_width_px: Image width in pixels. + image_height_px: Image height in pixels. + min_transcripts: Minimum transcript count per tile. + + Returns: + Tuple of (merged patches, number of merges performed). + """ + if len(patches) <= 1: + return patches, 0 + + # Work with mutable list + active = list(patches) + merge_count = 0 + + while True: + counts = _count_transcripts_per_tile(active, x_coords_um, y_coords_um) + adjacency = _find_adjacent_patches(active) + + # Find sparsest tile below threshold + sparse_candidates = [ + (pid, cnt) for pid, cnt in counts.items() if cnt < min_transcripts + ] + if not sparse_candidates: + break + + # Sort by count ascending to merge sparsest first + sparse_candidates.sort(key=lambda t: t[1]) + sparse_id, sparse_count = sparse_candidates[0] + + # Find neighbors and pick the least populated one for balanced merging + neighbors = adjacency.get(sparse_id, []) + if not neighbors: + # No neighbors for this tile — skip it and try next sparsest + sparse_candidates = [(pid, cnt) for pid, cnt in sparse_candidates[1:]] + found = False + for pid, cnt in sparse_candidates: + nbrs = adjacency.get(pid, []) + if nbrs: + sparse_id, sparse_count = pid, cnt + neighbors = nbrs + found = True + break + if not found: + break + + best_neighbor_id = min(neighbors, key=lambda nid: counts.get(nid, 0)) + + # Find the actual PatchInfo objects + sparse_patch = next(p for p in active if p.patch_id == sparse_id) + neighbor_patch = next(p for p in active if p.patch_id == best_neighbor_id) + + # Expand neighbor's global bounds to cover both tiles + sg = sparse_patch.global_bounds_um + ng = neighbor_patch.global_bounds_um + merged_global_um = Bounds( + x_min=min(sg.x_min, ng.x_min), + x_max=max(sg.x_max, ng.x_max), + y_min=min(sg.y_min, ng.y_min), + y_max=max(sg.y_max, ng.y_max), + ) + + # Also merge core bounds (union) + sc = sparse_patch.core_bounds_um + nc = neighbor_patch.core_bounds_um + merged_core_um = Bounds( + x_min=min(sc.x_min, nc.x_min), + x_max=max(sc.x_max, nc.x_max), + y_min=min(sc.y_min, nc.y_min), + y_max=max(sc.y_max, nc.y_max), + ) + + # Convert merged bounds to pixels + merged_global_px = Bounds( + x_min=max(0, int(round(merged_global_um.x_min / pixel_size_um))), + x_max=min( + image_width_px, int(round(merged_global_um.x_max / pixel_size_um)) + ), + y_min=max(0, int(round(merged_global_um.y_min / pixel_size_um))), + y_max=min( + image_height_px, int(round(merged_global_um.y_max / pixel_size_um)) + ), + ) + merged_core_px = Bounds( + x_min=max(0, int(round(merged_core_um.x_min / pixel_size_um))), + x_max=min(image_width_px, int(round(merged_core_um.x_max / pixel_size_um))), + y_min=max(0, int(round(merged_core_um.y_min / pixel_size_um))), + y_max=min( + image_height_px, int(round(merged_core_um.y_max / pixel_size_um)) + ), + ) + + # Create merged patch (keeps absorbing tile's ID and position) + merged_patch = PatchInfo( + patch_id=neighbor_patch.patch_id, + row=neighbor_patch.row, + col=neighbor_patch.col, + global_bounds_px=merged_global_px, + global_bounds_um=merged_global_um, + core_bounds_px=merged_core_px, + core_bounds_um=merged_core_um, + ) + + # Replace neighbor with merged patch and remove sparse tile + active = [ + merged_patch if p.patch_id == best_neighbor_id else p + for p in active + if p.patch_id != sparse_id + ] + merge_count += 1 + + print( + f" Merged {sparse_id} ({sparse_count:,} transcripts) " + f"into {best_neighbor_id} ({counts[best_neighbor_id]:,} transcripts)" + ) + + if merge_count > 0: + # Recalculate core bounds for consistency + active = _recalculate_core_bounds( + active, overlap_px, pixel_size_um, image_width_px, image_height_px + ) + + return active, merge_count + + +# --------------------------------------------------------------------------- +# Transcript division +# --------------------------------------------------------------------------- + + +def _filter_and_write_patch_transcripts( + full_table: pa.Table, + output_path: Path, + bounds_um: Bounds, + origin_x: float, + origin_y: float, +) -> int: + """ + Filter transcripts to a spatial region and write to parquet. + + Transcripts are filtered to global_bounds (including overlap), then + coordinates are offset by subtracting the global_bounds origin. + + Args: + full_table: Full transcript table as a pyarrow Table. + output_path: Path for the filtered output parquet. + bounds_um: Spatial bounding box for filtering (microns). + origin_x: X offset to subtract for local coordinates. + origin_y: Y offset to subtract for local coordinates. + + Returns: + Number of transcripts written. + """ + x_col = full_table.column("x_location") + y_col = full_table.column("y_location") + + mask = pc.and_( + pc.and_( + pc.greater_equal(x_col, pa.scalar(bounds_um.x_min, type=x_col.type)), + pc.less(x_col, pa.scalar(bounds_um.x_max, type=x_col.type)), + ), + pc.and_( + pc.greater_equal(y_col, pa.scalar(bounds_um.y_min, type=y_col.type)), + pc.less(y_col, pa.scalar(bounds_um.y_max, type=y_col.type)), + ), + ) + filtered = full_table.filter(mask) + + if origin_x != 0.0 or origin_y != 0.0: + fx = filtered.column("x_location") + fy = filtered.column("y_location") + x_local = pc.subtract(fx, pa.scalar(origin_x, type=fx.type)) + y_local = pc.subtract(fy, pa.scalar(origin_y, type=fy.type)) + idx_x = filtered.schema.get_field_index("x_location") + idx_y = filtered.schema.get_field_index("y_location") + filtered = filtered.set_column(idx_x, "x_location", x_local) + filtered = filtered.set_column(idx_y, "y_location", y_local) + + output_path.parent.mkdir(parents=True, exist_ok=True) + pq.write_table(filtered, str(output_path)) + return len(filtered) + + +def _process_patch( + patch: PatchInfo, + output_dir: Path, + full_table: pa.Table, +) -> int: + """ + Write transcript subset for a single patch. + + Args: + patch: Patch metadata. + output_dir: Root output directory. + full_table: Full transcript table. + + Returns: + Number of transcripts written. + """ + patch_dir = output_dir / patch.patch_id + bounds_um = patch.global_bounds_um + return _filter_and_write_patch_transcripts( + full_table, + patch_dir / "transcripts.parquet", + bounds_um, + origin_x=bounds_um.x_min, + origin_y=bounds_um.y_min, + ) + + +# --------------------------------------------------------------------------- +# JSON serialization +# --------------------------------------------------------------------------- + + +def _bounds_to_dict(b: Bounds) -> dict[str, float]: + """Serialize a Bounds to a JSON-compatible dict.""" + return {"x_min": b.x_min, "x_max": b.x_max, "y_min": b.y_min, "y_max": b.y_max} + + +def save_grid_metadata( + patches: list[PatchInfo], + image_height_px: int, + image_width_px: int, + pixel_size_um: float, + transcript_extent_um: Bounds, + grid_rows: int, + grid_cols: int, + overlap_um: float, + overlap_px: int, + grid_type: str, + output_path: Path, +) -> None: + """ + Serialize grid metadata to JSON. + + Args: + patches: List of PatchInfo objects. + image_height_px: Image height in pixels. + image_width_px: Image width in pixels. + pixel_size_um: Microns per pixel. + transcript_extent_um: Bounding box of transcript coordinates. + grid_rows: Number of rows in the initial grid. + grid_cols: Number of columns in the initial grid. + overlap_um: Overlap in microns. + overlap_px: Overlap in pixels. + grid_type: Grid type string ("uniform" or "density_quadtree"). + output_path: Path to write JSON file. + """ + data = { + "version": "1.0", + "bundle_path": "", + "image_height_px": image_height_px, + "image_width_px": image_width_px, + "pixel_size_um": pixel_size_um, + "transcript_extent_um": _bounds_to_dict(transcript_extent_um), + "grid_rows": grid_rows, + "grid_cols": grid_cols, + "overlap_um": overlap_um, + "overlap_px": overlap_px, + "grid_type": grid_type, + "patches": [ + { + "patch_id": p.patch_id, + "row": p.row, + "col": p.col, + "global_bounds_px": _bounds_to_dict(p.global_bounds_px), + "global_bounds_um": _bounds_to_dict(p.global_bounds_um), + "core_bounds_px": _bounds_to_dict(p.core_bounds_px), + "core_bounds_um": _bounds_to_dict(p.core_bounds_um), + } + for p in patches + ], + } + output_path.parent.mkdir(parents=True, exist_ok=True) + with open(output_path, "w") as f: + json.dump(data, f, indent=2) + + +# --------------------------------------------------------------------------- +# Coordinate shift helper +# --------------------------------------------------------------------------- + + +def _shift_patches_to_real_coords( + patches: list[PatchInfo], + ox: float, + oy: float, +) -> list[PatchInfo]: + """ + Shift patch micron bounds by (ox, oy) to align with real transcript coords. + + Pixel bounds remain zero-origin (there is no real image to index into). + + Args: + patches: Patches in zero-origin micron space. + ox: X offset (transcript extent x_min). + oy: Y offset (transcript extent y_min). + + Returns: + New list of PatchInfo with shifted micron bounds. + """ + shifted: list[PatchInfo] = [] + for p in patches: + gu = p.global_bounds_um + cu = p.core_bounds_um + shifted.append( + PatchInfo( + patch_id=p.patch_id, + row=p.row, + col=p.col, + global_bounds_px=p.global_bounds_px, + global_bounds_um=Bounds( + gu.x_min + ox, gu.x_max + ox, gu.y_min + oy, gu.y_max + oy + ), + core_bounds_px=p.core_bounds_px, + core_bounds_um=Bounds( + cu.x_min + ox, cu.x_max + ox, cu.y_min + oy, cu.y_max + oy + ), + ) + ) + return shifted + + +# --------------------------------------------------------------------------- +# Main divide logic +# --------------------------------------------------------------------------- + + +def divide_transcripts( + transcripts_path: Path, + output_dir: Path, + image_width_px: int, + image_height_px: int, + tile_width_um: float, + overlap_um: float, + balanced: bool, + pixel_size_um: float = XENIUM_PIXEL_SIZE_UM, + max_workers: int | None = None, + min_transcripts: int = 1000, +) -> None: + """ + Divide transcripts into overlapping spatial patches. + + Reads the transcript table once, computes a grid, merges sparse tiles + into neighbors, and writes per-patch parquet files with coordinates + offset to patch-local space. + + Args: + transcripts_path: Path to transcripts.parquet. + output_dir: Output directory for patches. + image_width_px: Image width in pixels. + image_height_px: Image height in pixels. + tile_width_um: Tile width in microns. + overlap_um: Overlap between adjacent patches in microns. + balanced: If True, use density quadtree mode. + pixel_size_um: Microns per pixel. + max_workers: Maximum threads for parallel patch writes. + min_transcripts: Minimum transcripts per tile; sparse tiles merged + into neighbors. Set to 0 to disable merging. + """ + output_dir.mkdir(parents=True, exist_ok=True) + + # Read full transcript table + full_table = pq.read_table(str(transcripts_path)) + n_total = len(full_table) + print(f"Read {n_total:,} transcripts from {transcripts_path}") + + # Compute transcript extent + x_col = full_table.column("x_location") + y_col = full_table.column("y_location") + extent_um = Bounds( + x_min=pc.min(x_col).as_py(), + x_max=pc.max(x_col).as_py(), + y_min=pc.min(y_col).as_py(), + y_max=pc.max(y_col).as_py(), + ) + print( + f"Transcript extent: " + f"x=[{extent_um.x_min:.1f}, {extent_um.x_max:.1f}] " + f"y=[{extent_um.y_min:.1f}, {extent_um.y_max:.1f}] um" + ) + + # Build grid in zero-origin space when transcripts have a positive offset. + # The grid functions work in pixel space starting at (0, 0). We shift + # micron bounds back to real coordinates afterward. + ox = extent_um.x_min + oy = extent_um.y_min + + if balanced: + # Shift coordinates to zero-origin for density computation + x_coords = x_col.to_numpy() - ox + y_coords = y_col.to_numpy() - oy + + patches, grid_rows, grid_cols, overlap_px = compute_density_quadtree_grid( + image_height_px=image_height_px, + image_width_px=image_width_px, + tile_width_um=tile_width_um, + overlap_um=overlap_um, + pixel_size_um=pixel_size_um, + x_coords_um=x_coords, + y_coords_um=y_coords, + ) + grid_type = "density_quadtree" + else: + patches, grid_rows, grid_cols, overlap_px = compute_tilewidth_uniform_grid( + image_height_px=image_height_px, + image_width_px=image_width_px, + tile_width_um=tile_width_um, + overlap_um=overlap_um, + pixel_size_um=pixel_size_um, + transcript_extent_um=extent_um, + ) + grid_type = "uniform" + + # Merge sparse tiles into neighbors + n_before_merge = len(patches) + if min_transcripts > 0 and len(patches) > 1: + # Coordinates for counting: use zero-origin if not already + if balanced: + merge_x = x_coords + merge_y = y_coords + else: + merge_x = x_col.to_numpy() - ox + merge_y = y_col.to_numpy() - oy + + patches, n_merged = merge_sparse_tiles( + patches=patches, + x_coords_um=merge_x, + y_coords_um=merge_y, + overlap_px=overlap_px, + pixel_size_um=pixel_size_um, + image_width_px=image_width_px, + image_height_px=image_height_px, + min_transcripts=min_transcripts, + ) + if n_merged > 0: + grid_type = f"{grid_type}+merged" + print( + f"Merged {n_merged} sparse tiles: " + f"{n_before_merge} -> {len(patches)} patches" + ) + + # Shift micron bounds to real transcript coordinates + if ox != 0.0 or oy != 0.0: + patches = _shift_patches_to_real_coords(patches, ox, oy) + + print( + f"Grid: {grid_type}, {grid_rows}x{grid_cols} initial, " + f"{len(patches)} patches, overlap={overlap_um} um" + ) + + # Write patches in parallel + n_patches = len(patches) + workers = ( + max_workers if max_workers is not None else min(n_patches, os.cpu_count() or 1) + ) + + with ThreadPoolExecutor(max_workers=workers) as pool: + futures = [ + pool.submit(_process_patch, patch, output_dir, full_table) + for patch in patches + ] + for i, future in enumerate(futures): + count = future.result() + print(f" {patches[i].patch_id}: {count:,} transcripts") + + # Save grid metadata + save_grid_metadata( + patches=patches, + image_height_px=image_height_px, + image_width_px=image_width_px, + pixel_size_um=pixel_size_um, + transcript_extent_um=extent_um, + grid_rows=grid_rows, + grid_cols=grid_cols, + overlap_um=overlap_um, + overlap_px=overlap_px, + grid_type=grid_type, + output_path=output_dir / "patch_grid.json", + ) + print(f"Grid metadata saved to {output_dir / 'patch_grid.json'}") + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + + +def parse_args(argv: list[str] | None = None) -> argparse.Namespace: + """ + Parse command-line arguments. + + Args: + argv: Argument list (defaults to sys.argv[1:]). + + Returns: + Parsed arguments namespace. + """ + parser = argparse.ArgumentParser( + description="Divide Xenium transcripts.parquet into spatial patches.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "--transcripts", + type=Path, + required=True, + help="Path to transcripts.parquet", + ) + parser.add_argument( + "--output", + type=Path, + required=True, + help="Output directory for patches", + ) + parser.add_argument( + "--tile-width", + type=float, + default=2000.0, + help="Tile width in microns", + ) + parser.add_argument( + "--overlap", + type=float, + default=50.0, + help="Overlap between patches in microns", + ) + parser.add_argument( + "--balanced", + action="store_true", + help="Enable density quadtree mode (subdivides dense tiles)", + ) + parser.add_argument( + "--image-width", + type=int, + required=True, + help="Image width in pixels", + ) + parser.add_argument( + "--image-height", + type=int, + required=True, + help="Image height in pixels", + ) + parser.add_argument( + "--pixel-size", + type=float, + default=XENIUM_PIXEL_SIZE_UM, + help="Pixel size in microns", + ) + parser.add_argument( + "--min-transcripts", + type=int, + default=1000, + help="Minimum transcripts per tile; sparse tiles are merged into neighbors", + ) + parser.add_argument( + "--max-workers", + type=int, + default=None, + help="Maximum threads for parallel writes", + ) + return parser.parse_args(argv) + + +def main(argv: list[str] | None = None) -> None: + """Entry point.""" + args = parse_args(argv) + + divide_transcripts( + transcripts_path=args.transcripts, + output_dir=args.output, + image_width_px=args.image_width, + image_height_px=args.image_height, + tile_width_um=args.tile_width, + overlap_um=args.overlap, + balanced=args.balanced, + pixel_size_um=args.pixel_size, + max_workers=args.max_workers, + min_transcripts=args.min_transcripts, + ) + + +if __name__ == "__main__": + main() diff --git a/bin/ficture_preprocess.py b/bin/ficture_preprocess.py new file mode 100755 index 00000000..2e0c687c --- /dev/null +++ b/bin/ficture_preprocess.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python3 +"""Preprocess Xenium transcripts for FICTURE analysis.""" + +import argparse +import gzip +import logging +import os +import re +import sys + +import pandas as pd + + +def parse_args(): + """Parse command-line arguments.""" + parser = argparse.ArgumentParser( + description="Preprocess Xenium transcripts for FICTURE" + ) + parser.add_argument( + "--transcripts", required=True, help="Path to transcripts file (CSV)" + ) + parser.add_argument( + "--features", default="", help="Path to features file (optional)" + ) + parser.add_argument( + "--negative-control-regex", default="", help="Regex for negative control probes" + ) + return parser.parse_args() + + +def main(): + """Run FICTURE preprocessing.""" + args = parse_args() + print("[START]") + + negctrl_regex = "BLANK|NegCon" + if args.negative_control_regex: + negctrl_regex = args.negative_control_regex + + unit_info = ["X", "Y", "gene", "cell_id", "overlaps_nucleus"] + oheader = unit_info + ["Count"] + + feature = pd.DataFrame() + xmin = sys.maxsize + xmax = 0 + ymin = sys.maxsize + ymax = 0 + + output = "processed_transcripts.tsv.gz" + feature_file = "feature.clean.tsv.gz" + min_phred_score = 15 + + with gzip.open(output, "wt") as wf: + wf.write("\t".join(oheader) + "\n") + + for chunk in pd.read_csv(args.transcripts, header=0, chunksize=500000): + chunk = chunk.loc[(chunk.qv > min_phred_score)] + chunk.rename(columns={"feature_name": "gene"}, inplace=True) + if negctrl_regex != "": + chunk = chunk[ + ~chunk.gene.str.contains(negctrl_regex, flags=re.IGNORECASE, regex=True) + ] + chunk.rename(columns={"x_location": "X", "y_location": "Y"}, inplace=True) + chunk["Count"] = 1 + chunk[oheader].to_csv( + output, sep="\t", mode="a", index=False, header=False, float_format="%.2f" + ) + logging.info(f"{chunk.shape[0]}") + feature = pd.concat( + [feature, chunk.groupby(by="gene").agg({"Count": "sum"}).reset_index()] + ) + x0 = chunk.X.min() + x1 = chunk.X.max() + y0 = chunk.Y.min() + y1 = chunk.Y.max() + xmin = min(int(xmin), int(x0)) + xmax = max(int(xmax), int(x1)) + ymin = min(int(ymin), int(y0)) + ymax = max(int(ymax), int(y1)) + + if os.path.exists(args.features): + feature_list = [] + with open(args.features, "r") as ff: + for line in ff: + feature_list.append(line.strip("\n")) + feature = feature.groupby(by="gene").agg({"Count": "sum"}).reset_index() + feature = feature[[x in feature_list for x in feature["gene"]]] + feature.to_csv(feature_file, sep="\t", index=False) + + f = os.path.join(os.path.dirname(output), "coordinate_minmax.tsv") + with open(f, "w") as wf: + wf.write(f"xmin\t{xmin}\n") + wf.write(f"xmax\t{xmax}\n") + wf.write(f"ymin\t{ymin}\n") + wf.write(f"ymax\t{ymax}\n") + + print("[FINISH]") + + +if __name__ == "__main__": + main() diff --git a/bin/segger_create_dataset.py b/bin/segger_create_dataset.py new file mode 100755 index 00000000..c031427d --- /dev/null +++ b/bin/segger_create_dataset.py @@ -0,0 +1,253 @@ +#!/usr/bin/env python3 +""" +Run segger create_dataset with spatialaxe-specific preprocessing and workarounds. + +Wraps segger's create_dataset_fast.py with: + - bundle_local symlink prep (handles read-only S3/Fusion mounts) + - parquet column statistics (segger needs these) + - WORKAROUND: filter trainable tiles from test_tiles when segger commit 0787167 mis-splits + - WORKAROUND: replace NaN bd.x with zeros after get_polygon_props produces NaN + +Each WORKAROUND should be removable when the upstream segger bug is fixed. +""" + +import argparse +import os +import shutil +import subprocess +import sys +from pathlib import Path + +# imports for actual work (used in functions below) +import pyarrow.parquet as pq +import pyarrow.compute as pc +import torch + + +SEGGER_CLI = "/workspace/segger_dev/src/segger/cli/create_dataset_fast.py" + + +def parse_args(): + p = argparse.ArgumentParser() + p.add_argument("--bundle-dir", required=True) + p.add_argument("--output-dir", required=True) + p.add_argument("--sample-type", required=True, choices=["xenium"]) + p.add_argument("--tile-width", type=int, required=True) + p.add_argument("--tile-height", type=int, required=True) + p.add_argument("--n-workers", type=int, required=True) + # remaining args forwarded to segger CLI + args, extra = p.parse_known_args() + return args, extra + + +def prepare_bundle(bundle_dir): + """Create local bundle dir with absolute symlinks (S3/Fusion read-only-safe).""" + Path("bundle_local").mkdir(exist_ok=True) + for item in Path(bundle_dir).iterdir(): + try: + abs_path = item.resolve() + except Exception: + abs_path = item + target = Path("bundle_local") / item.name + if target.exists() or target.is_symlink(): + target.unlink() + target.symlink_to(abs_path) + + # Segger expects nucleus_boundaries.parquet but Xenium bundles have cell_boundaries.parquet + nb = Path("bundle_local/nucleus_boundaries.parquet") + cb = Path("bundle_local/cell_boundaries.parquet") + if not nb.exists() and cb.exists(): + print( + "Creating nucleus_boundaries.parquet symlink from cell_boundaries.parquet" + ) + nb.symlink_to(cb.resolve()) + + print("Bundle contents:") + for item in sorted(Path("bundle_local").iterdir()): + print(f" {item.name}") + + +def add_parquet_stats(): + """Rewrite key parquet files with column statistics (segger requires them).""" + Path("bundle_stats").mkdir(exist_ok=True) + for fname in ["transcripts.parquet", "nucleus_boundaries.parquet"]: + src = Path("bundle_local") / fname + dst = Path("bundle_stats") / fname + if not src.exists(): + print(f" Skip {src}") + continue + t = pq.read_table(str(src)) + pq.write_table(t, str(dst), write_statistics=True, compression="snappy") + print(f" Done {fname} ({len(t)} rows)") + + # Symlink everything else from bundle_local into bundle_stats + for item in Path("bundle_local").iterdir(): + dst = Path("bundle_stats") / item.name + if not dst.exists(): + dst.symlink_to(item.resolve()) + + # Debug: check overlaps_nucleus column in transcripts + print("\n=== Debugging overlaps_nucleus data ===") + tx = pq.read_table("bundle_stats/transcripts.parquet") + bd = pq.read_table("bundle_stats/nucleus_boundaries.parquet") + if "overlaps_nucleus" in tx.column_names: + col = tx.column("overlaps_nucleus") + print(f"overlaps_nucleus dtype: {col.type}") + unique_vals = pc.unique(col) + print(f"overlaps_nucleus unique values: {unique_vals.to_pylist()[:10]}") + val_counts = pc.value_counts(col) + print(f"overlaps_nucleus value_counts: {val_counts.to_pylist()}") + else: + print("WARNING: overlaps_nucleus column NOT FOUND in transcripts.parquet") + + if "cell_id" in tx.column_names and "cell_id" in bd.column_names: + tx_cells = set(pc.unique(tx.column("cell_id")).to_pylist()) + bd_cells = set(pc.unique(bd.column("cell_id")).to_pylist()) + overlap = tx_cells & bd_cells + print(f"Transcripts unique cell_ids: {len(tx_cells)}") + print(f"Boundaries unique cell_ids: {len(bd_cells)}") + print(f"Overlapping cell_ids: {len(overlap)}") + print("=== End Debug ===\n") + + +def run_segger_cli(args, extra): + cmd = [ + "python3", + SEGGER_CLI, + "--base_dir", + "bundle_stats", + "--data_dir", + args.output_dir, + "--sample_type", + args.sample_type, + "--tile_width", + str(args.tile_width), + "--tile_height", + str(args.tile_height), + "--n_workers", + str(args.n_workers), + *extra, + ] + print(f"Running: {' '.join(cmd)}") + result = subprocess.run(cmd) + if result.returncode != 0: + sys.exit(result.returncode) + + +def filter_trainable_tiles_if_needed(prefix): + """ + WORKAROUND: segger commit 0787167 has a bug where all tiles end up in test_tiles + regardless of test_prob/val_prob settings. Move ONLY trainable tiles (those with + edge_label_index) from test_tiles to train_tiles. + + Remove this function once segger >= 0.1.x is bumped with the upstream fix. + """ + train_dir = Path(prefix) / "train_tiles" / "processed" + test_dir = Path(prefix) / "test_tiles" / "processed" + val_dir = Path(prefix) / "val_tiles" / "processed" + + train_count = len(list(train_dir.iterdir())) if train_dir.exists() else 0 + test_count = len(list(test_dir.iterdir())) if test_dir.exists() else 0 + val_count = len(list(val_dir.iterdir())) if val_dir.exists() else 0 + print( + f"Dataset split (before fix): train={train_count} val={val_count} test={test_count}" + ) + + if train_count == 0 and test_count > 0: + print( + "Applying workaround: filtering trainable tiles from test_tiles (segger split bug)" + ) + moved = 0 + skipped = 0 + for tile_path in list(test_dir.iterdir()): + if not tile_path.name.endswith(".pt"): + continue + try: + tile = torch.load(str(tile_path), weights_only=False) + edge_store = tile["tx", "belongs", "bd"] + if ( + hasattr(edge_store, "edge_label_index") + and edge_store.edge_label_index.numel() > 0 + ): + shutil.move(str(tile_path), str(train_dir / tile_path.name)) + moved += 1 + else: + skipped += 1 + except Exception as e: + print(f"Warning: Could not process {tile_path.name}: {e}") + skipped += 1 + print(f"Moved {moved} trainable tiles to train_tiles") + print(f"Skipped {skipped} test-only tiles (no edge_label_index)") + + train_count = len(list(train_dir.iterdir())) if train_dir.exists() else 0 + test_count = len(list(test_dir.iterdir())) if test_dir.exists() else 0 + val_count = len(list(val_dir.iterdir())) if val_dir.exists() else 0 + print( + f"Dataset split (after fix): train={train_count} val={val_count} test={test_count}" + ) + + if train_count == 0: + print(f"ERROR: No trainable tiles were created in {train_dir}", file=sys.stderr) + print( + "This usually means no transcripts overlap with nucleus boundaries in the dataset.", + file=sys.stderr, + ) + print( + "Check if the Xenium bundle contains valid overlaps_nucleus data in transcripts.parquet.", + file=sys.stderr, + ) + sys.exit(1) + print(f"Successfully created {train_count} trainable tiles") + + +def fix_bd_x_nan(prefix): + """ + WORKAROUND: segger's get_polygon_props() produces NaN boundary features (bd.x) + when polygon geometries have zero area or index misalignment during GeoDataFrame + construction. Replace NaN bd.x with zeros so BCEWithLogitsLoss doesn't propagate NaN. + + Remove this function once segger >= 0.1.x is bumped with the upstream fix. + """ + fixed = 0 + total = 0 + for split in ["train_tiles", "test_tiles", "val_tiles"]: + tile_dir = Path(prefix) / split / "processed" + if not tile_dir.is_dir(): + continue + for tile_path in tile_dir.iterdir(): + if not tile_path.name.endswith(".pt"): + continue + total += 1 + tile = torch.load(str(tile_path), weights_only=False) + bd_x = tile["bd"].x + if bd_x.isnan().any(): + tile["bd"].x = torch.nan_to_num(bd_x, nan=0.0) + torch.save(tile, str(tile_path)) + fixed += 1 + print(f"Fixed NaN bd.x in {fixed}/{total} tiles") + + +def main(): + args, extra = parse_args() + + # Ensure numba cache dir is writable (env var should be set by caller, but belt-and-suspenders) + os.environ.setdefault("NUMBA_CACHE_DIR", os.path.join(os.getcwd(), ".numba_cache")) + os.makedirs(os.environ["NUMBA_CACHE_DIR"], exist_ok=True) + + prepare_bundle(args.bundle_dir) + print("Adding statistics to parquet files...") + add_parquet_stats() + + # Sanity-check bundle_stats + print("bundle_stats contents:") + for item in sorted(Path("bundle_stats").iterdir()): + print(f" {item.name}") + + run_segger_cli(args, extra) + + filter_trainable_tiles_if_needed(args.output_dir) + fix_bd_x_nan(args.output_dir) + + +if __name__ == "__main__": + main() diff --git a/bin/segger_predict.py b/bin/segger_predict.py new file mode 100755 index 00000000..b3e1f289 --- /dev/null +++ b/bin/segger_predict.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python3 +""" +Run segger predict with spatialaxe-specific preprocessing. + +Wraps segger's predict_fast.py with: + - GPU enumeration (replaces inline python3 -c torch check) + - WORKAROUND: patch predict_parquet.py at runtime to add torch.no_grad() for ~30-50% VRAM savings + - WORKAROUND: seed random.choice for deterministic GPU assignment (avoids stochastic OOM) + +Both WORKAROUNDs should be removable once the patches are upstreamed to segger. +""" + +import argparse +import os +import subprocess +import sys + + +SEGGER_CLI = "/workspace/segger_dev/src/segger/cli/predict_fast.py" + + +def parse_args(): + p = argparse.ArgumentParser() + p.add_argument("--models-dir", required=True) + p.add_argument("--segger-data-dir", required=True) + p.add_argument("--transcripts-file", required=True) + p.add_argument("--benchmarks-dir", required=True) + p.add_argument("--batch-size", type=int, required=True) + p.add_argument("--use-cc", required=True) + p.add_argument("--knn-method", required=True) + p.add_argument("--num-workers", type=int, required=True) + args, extra = p.parse_known_args() + return args, extra + + +def detect_gpus(): + """Return comma-separated list of available CUDA device ids (or "0" if none).""" + import torch + + print("=== GPU Detection (SEGGER_PREDICT) ===") + print(f"PyTorch CUDA available: {torch.cuda.is_available()}") + n = torch.cuda.device_count() + print(f"CUDA device count: {n}") + print("======================================") + if n > 0: + return ",".join(str(i) for i in range(n)) + return "0" + + +def patch_predict_parquet(): + """ + WORKAROUND: patch segger.prediction.predict_parquet at runtime. + + Avoids rebuilding the segger Docker image. Two patches: + 1. Add torch.no_grad() to disable gradient graphs during inference (~30-50% VRAM savings). + 2. Seed random for deterministic GPU assignment (avoids stochastic OOM). + + Remove this function once the patches are upstreamed to segger. + """ + import segger.prediction.predict_parquet as m + + pred_py = m.__file__ + print(f"Patching {pred_py}: torch.no_grad() + round-robin GPU assignment") + # Use sed via subprocess for in-place edit (matches the original behavior exactly) + subprocess.run( + [ + "sed", + "-i", + "s/with cp.cuda.Device(gpu_id):/with cp.cuda.Device(gpu_id), torch.no_grad():/", + pred_py, + ], + check=True, + ) + subprocess.run( + [ + "sed", + "-i", + "s/gpu_id = random.choice(gpu_ids)/random.seed(0); gpu_id = random.choice(gpu_ids)/", + pred_py, + ], + check=True, + ) + + +def run_segger_cli(args, extra, gpu_ids): + cmd = [ + "python3", + SEGGER_CLI, + "--models_dir", + args.models_dir, + "--segger_data_dir", + args.segger_data_dir, + "--transcripts_file", + args.transcripts_file, + "--benchmarks_dir", + args.benchmarks_dir, + "--batch_size", + str(args.batch_size), + "--use_cc", + str(args.use_cc), + "--knn_method", + args.knn_method, + "--num_workers", + str(args.num_workers), + "--gpu_ids", + gpu_ids, + *extra, + ] + print(f"Running: {' '.join(cmd)}") + result = subprocess.run(cmd) + if result.returncode != 0: + sys.exit(result.returncode) + + +def main(): + args, extra = parse_args() + + # Limit cupy GPU memory to 80% so PyTorch has headroom for graph attention ops + os.environ.setdefault("CUPY_GPU_MEMORY_LIMIT", "80%") + # Belt-and-suspenders: ensure PyTorch uses expandable segments + os.environ.setdefault( + "PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True,max_split_size_mb:512" + ) + # Numba cache directory + os.environ.setdefault("NUMBA_CACHE_DIR", os.path.join(os.getcwd(), ".numba_cache")) + os.makedirs(os.environ["NUMBA_CACHE_DIR"], exist_ok=True) + + gpu_ids = detect_gpus() + print(f"Using GPUs: {gpu_ids}") + + patch_predict_parquet() + + run_segger_cli(args, extra, gpu_ids) + + +if __name__ == "__main__": + main() diff --git a/bin/spatialdata_merge.py b/bin/spatialdata_merge.py new file mode 100755 index 00000000..5359a7b3 --- /dev/null +++ b/bin/spatialdata_merge.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 +"""Merge two spatialdata bundles to create a layered spatialdata object.""" + +import argparse +import json +import os +import shutil + + +def parse_args(): + """Parse command-line arguments.""" + parser = argparse.ArgumentParser(description="Merge two spatialdata bundles") + parser.add_argument("--raw-bundle", required=True, help="Path to raw spatialdata bundle") + parser.add_argument("--redefined-bundle", required=True, help="Path to redefined spatialdata bundle") + parser.add_argument("--prefix", required=True, help="Output prefix (sample ID)") + parser.add_argument("--output-folder", required=True, help="Output folder name") + return parser.parse_args() + + +def main(): + """Run spatialdata merge.""" + args = parse_args() + print("[START]") + + output_dir = f"spatialdata/{args.prefix}/{args.output_folder}" + + # Ensure the output folder exists + if os.path.exists(output_dir): + shutil.rmtree(output_dir) + os.makedirs(output_dir) + + # Copy the entire reference bundle as is + for root, _, files in os.walk(args.raw_bundle): + rel_path = os.path.relpath(root, args.raw_bundle) + target_path = os.path.join(output_dir, rel_path) + os.makedirs(target_path, exist_ok=True) + for file in files: + shutil.copy(os.path.join(root, file), os.path.join(target_path, file)) + + # Rename folders in Points, Shapes, and Tables to raw_* + for category in ["points", "shapes", "tables"]: + category_path = os.path.join(output_dir, category) + if os.path.exists(category_path): + for folder in next(os.walk(category_path))[1]: + old_path = os.path.join(category_path, folder) + print(folder) + new_path = os.path.join(category_path, f"raw_{folder}") + os.rename(old_path, new_path) + + # Copy folders from redefined_bundle and rename them as redefined_* + for category in ["points", "shapes", "tables"]: + add_category_path = os.path.join(args.redefined_bundle, category) + output_category_path = os.path.join(output_dir, category) + os.makedirs(output_category_path, exist_ok=True) + + if os.path.exists(add_category_path): + for folder in next(os.walk(add_category_path))[1]: + src_folder = os.path.join(add_category_path, folder) + dest_folder = os.path.join(output_category_path, f"redefined_{folder}") + shutil.copytree(src_folder, dest_folder) + + # Invalidate consolidated metadata in zarr.json -- the directory renames above + # made the element paths in the metadata stale (e.g., 'points/transcripts' -> + # 'points/raw_transcripts'). Without consolidated metadata, sd.read_zarr() + # discovers elements by scanning the filesystem directly. + zarr_json = os.path.join(output_dir, "zarr.json") + if os.path.exists(zarr_json): + with open(zarr_json) as f: + meta = json.load(f) + if "consolidated_metadata" in meta: + del meta["consolidated_metadata"] + with open(zarr_json, "w") as f: + json.dump(meta, f) + print("[NOTE] Removed stale consolidated metadata from zarr.json") + + print("[FINISH]") + + +if __name__ == "__main__": + main() diff --git a/bin/spatialdata_meta.py b/bin/spatialdata_meta.py new file mode 100755 index 00000000..20a9c0ef --- /dev/null +++ b/bin/spatialdata_meta.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python3 +"""Add metadata to SpatialData bundle.""" + +import argparse +import json +import sys + +import pandas as pd +import spatialdata as sd + +# Fix zarr v3 + anndata + numcodecs incompatibility: +# anndata's string writer passes numcodecs.VLenUTF8 to zarr.Group.create_array, +# but zarr v3 only accepts ArrayArrayCodec types. OME-Zarr 0.5 requires zarr v3 +# for images, so we can't downgrade the store format. Instead, we intercept +# create_array to strip numcodecs codecs and let zarr v3 handle strings natively. +import numcodecs +import zarr.core.group as _zarr_group + +_orig_create_array = _zarr_group.Group.create_array + + +def _v3_compat_create_array(self, *args, **kwargs): + """Strip numcodecs VLenUTF8 from codec params for zarr v3 compatibility.""" + for param in ("filters", "compressor", "object_codec"): + val = kwargs.get(param) + if val is None: + continue + if isinstance(val, numcodecs.vlen.VLenUTF8): + del kwargs[param] + elif isinstance(val, (list, tuple)): + cleaned = [v for v in val if not isinstance(v, numcodecs.vlen.VLenUTF8)] + if len(cleaned) != len(val): + if cleaned: + kwargs[param] = cleaned + else: + del kwargs[param] + return _orig_create_array(self, *args, **kwargs) + + +_zarr_group.Group.create_array = _v3_compat_create_array + + +def _is_arrow_backed(dtype): + """Check if a pandas dtype is backed by PyArrow.""" + return isinstance(dtype, pd.ArrowDtype) or ( + hasattr(dtype, "storage") and getattr(dtype, "storage", None) == "pyarrow" + ) or "pyarrow" in str(dtype) + + +def _convert_df_arrow_to_numpy(df): + """Convert Arrow-backed dtypes in a DataFrame to numpy object dtype.""" + for col in df.columns: + dtype = df[col].dtype + if _is_arrow_backed(dtype): + df[col] = df[col].astype("object") + elif isinstance(dtype, pd.CategoricalDtype): + cats = dtype.categories + if cats is not None and _is_arrow_backed(cats.dtype): + df[col] = df[col].cat.rename_categories(cats.astype("object")) + if _is_arrow_backed(df.index.dtype): + df.index = pd.Index(df.index.astype("object")) + + +def convert_arrow_to_numpy(sdata): + """Convert Arrow-backed dtypes to numpy for anndata zarr write compatibility.""" + for table_key in list(sdata.tables.keys()): + adata = sdata.tables[table_key] + _convert_df_arrow_to_numpy(adata.obs) + _convert_df_arrow_to_numpy(adata.var) + + +def parse_args(): + """Parse command-line arguments.""" + parser = argparse.ArgumentParser(description="Add metadata to SpatialData bundle") + parser.add_argument("--spatialdata-bundle", required=True, help="Path to spatialdata bundle") + parser.add_argument("--xenium-bundle", required=True, help="Path to xenium bundle") + parser.add_argument("--prefix", required=True, help="Output prefix (sample ID)") + parser.add_argument("--metadata", required=True, help="Metadata string from Nextflow meta map") + parser.add_argument("--output-folder", required=True, help="Output folder name") + return parser.parse_args() + + +def main(): + """Run spatialdata metadata addition.""" + args = parse_args() + print("[START]") + + sdata = sd.read_zarr(args.spatialdata_bundle) + + # Convert metadata into dict + print("[NOTE] Read in provenance ...") + metadata = args.metadata.strip("[]") # Remove square brackets + pairs = metadata.split(", ") # Split by comma and space + metadata = {k: v for k, v in (pair.split(":") for pair in pairs)} # Create dictionary + + for key in metadata: + if key not in sdata['raw_table'].uns['spatialdata_attrs']: + sdata['raw_table'].uns['spatialdata_attrs'][key] = metadata[key] + else: + print(f'[ERROR] {key} already exist in sdata[raw_table].uns[spatialdata_attrs].', file=sys.stderr) + + # Add experimental metadata + print("[NOTE] Read in experiment metadata ...") + sdata['raw_table'].uns['experiment_xenium'] = '' + metadata_experiment = f'{args.xenium_bundle}/experiment.xenium' + with open(metadata_experiment, "r") as f: + metadata_experiment = json.load(f) + sdata['raw_table'].uns['experiment_xenium'] = json.dumps(metadata_experiment) + + # Add gene panel metadata + print("[NOTE] Read in gene panel metadata ...") + sdata['raw_table'].uns['gene_panel'] = '' + metadata_gene_panel = f'{args.xenium_bundle}/gene_panel.json' + with open(metadata_gene_panel, "r") as f: + metadata_gene_panel = json.load(f) + sdata['raw_table'].uns['gene_panel'] = json.dumps(metadata_gene_panel) + + convert_arrow_to_numpy(sdata) + sdata.write(f"spatialdata/{args.prefix}/{args.output_folder}", overwrite=True, consolidate_metadata=True, sdata_formats=None) + + print("[FINISH]") + + +if __name__ == "__main__": + main() diff --git a/bin/spatialdata_write.py b/bin/spatialdata_write.py new file mode 100755 index 00000000..3a4723e0 --- /dev/null +++ b/bin/spatialdata_write.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python3 +"""Write spatialdata object from segmentation format.""" + +import argparse +import sys + +import pandas as pd +from spatialdata_io import xenium + +# Fix zarr v3 + anndata + numcodecs incompatibility: +# anndata's string writer passes numcodecs.VLenUTF8 to zarr.Group.create_array, +# but zarr v3 only accepts ArrayArrayCodec types. OME-Zarr 0.5 requires zarr v3 +# for images, so we can't downgrade the store format. Instead, we intercept +# create_array to strip numcodecs codecs and let zarr v3 handle strings natively. +import numcodecs +import zarr.core.group as _zarr_group + +_orig_create_array = _zarr_group.Group.create_array + + +def _v3_compat_create_array(self, *args, **kwargs): + """Strip numcodecs VLenUTF8 from codec params for zarr v3 compatibility.""" + for param in ("filters", "compressor", "object_codec"): + val = kwargs.get(param) + if val is None: + continue + if isinstance(val, numcodecs.vlen.VLenUTF8): + del kwargs[param] + elif isinstance(val, (list, tuple)): + cleaned = [v for v in val if not isinstance(v, numcodecs.vlen.VLenUTF8)] + if len(cleaned) != len(val): + if cleaned: + kwargs[param] = cleaned + else: + del kwargs[param] + return _orig_create_array(self, *args, **kwargs) + + +_zarr_group.Group.create_array = _v3_compat_create_array + + +def _is_arrow_backed(dtype): + """Check if a pandas dtype is backed by PyArrow.""" + return ( + isinstance(dtype, pd.ArrowDtype) + or (hasattr(dtype, "storage") and getattr(dtype, "storage", None) == "pyarrow") + or "pyarrow" in str(dtype) + ) + + +def _convert_df_arrow_to_numpy(df): + """Convert Arrow-backed dtypes in a DataFrame to numpy object dtype. + + Handles three cases: + 1. Regular columns with Arrow-backed dtypes + 2. Categorical columns whose categories are Arrow-backed + 3. Index with Arrow-backed dtype + """ + for col in df.columns: + dtype = df[col].dtype + if _is_arrow_backed(dtype): + df[col] = df[col].astype("object") + elif isinstance(dtype, pd.CategoricalDtype): + cats = dtype.categories + if cats is not None and _is_arrow_backed(cats.dtype): + df[col] = df[col].cat.rename_categories(cats.astype("object")) + if _is_arrow_backed(df.index.dtype): + df.index = pd.Index(df.index.astype("object")) + + +def convert_arrow_to_numpy(sdata): + """Convert Arrow-backed dtypes to numpy for anndata zarr write compatibility.""" + for table_key in list(sdata.tables.keys()): + adata = sdata.tables[table_key] + _convert_df_arrow_to_numpy(adata.obs) + _convert_df_arrow_to_numpy(adata.var) + + +def parse_args(): + """Parse command-line arguments.""" + parser = argparse.ArgumentParser(description="Write spatialdata object from segmentation format") + parser.add_argument("--bundle", required=True, help="Path to input bundle") + parser.add_argument("--prefix", required=True, help="Output prefix (sample ID)") + parser.add_argument("--output-folder", required=True, help="Output folder name") + parser.add_argument("--segmented-object", required=True, help="Segmented object type (cells, nuclei, cells_and_nuclei)") + parser.add_argument("--coordinate-space", required=True, help="Coordinate space (pixels, microns)") + parser.add_argument("--format", required=True, help="Input format (xenium)") + return parser.parse_args() + + +def main(): + """Run spatialdata write.""" + args = parse_args() + print("[START]") + + cells_as_circles = False + cells_boundaries = False + nucleus_boundaries = False + cells_labels = False + nucleus_labels = False + + if args.segmented_object == "cells": + cells_boundaries = True + cells_labels = True + elif args.segmented_object == "nuclei": + nucleus_boundaries = True + nucleus_labels = True + elif args.segmented_object == "cells_and_nuclei": + cells_boundaries = True + nucleus_boundaries = True + cells_labels = True + nucleus_labels = True + else: + cells_as_circles = False + + # set sd variables based on the coordinate space + if args.coordinate_space == "pixels": + cells_labels = True + nucleus_labels = True + # Labels are sufficient in pixel space; boundaries can contain + # degenerate polygons (< 4 vertices) from XeniumRanger that + # crash spatialdata_io's shapely LinearRing parser. + cells_boundaries = False + nucleus_boundaries = False + + if args.coordinate_space == "microns": + cells_labels = False + cells_boundaries = True + nucleus_boundaries = False + nucleus_labels = False + cells_as_circles = False + + if args.format == "xenium": + sd_xenium_obj = xenium( + args.bundle, + cells_as_circles=cells_as_circles, + cells_boundaries=cells_boundaries, + nucleus_boundaries=nucleus_boundaries, + cells_labels=cells_labels, + nucleus_labels=nucleus_labels, + transcripts=True, + morphology_mip=True, + morphology_focus=True, + ) + print(sd_xenium_obj) + convert_arrow_to_numpy(sd_xenium_obj) + sd_xenium_obj.write(f"spatialdata/{args.prefix}/{args.output_folder}") + else: + sys.exit("[ERROR] Format not found") + + print("[FINISH]") + + +if __name__ == "__main__": + main() diff --git a/bin/stitch_transcripts.py b/bin/stitch_transcripts.py new file mode 100755 index 00000000..45f057e3 --- /dev/null +++ b/bin/stitch_transcripts.py @@ -0,0 +1,848 @@ +#!/usr/bin/env python3 +"""Stitch per-patch Baysor segmentation results into unified output. + +Standalone script that replaces the xenium_patch CLI package's stitch +functionality. Uses sopa's solve_conflicts() for overlap resolution. +""" + +from __future__ import annotations + +import argparse +import json +import os +from concurrent.futures import ThreadPoolExecutor +from dataclasses import dataclass +from pathlib import Path + +import geopandas as gpd +import numpy as np +import pyarrow as pa +import pyarrow.compute as pc +import pyarrow.csv as pa_csv +import shapely +from shapely.affinity import translate +from shapely.geometry import mapping, shape +from sopa.segmentation.resolve import solve_conflicts + +# --------------------------------------------------------------------------- +# Inline types (from _types.py) +# --------------------------------------------------------------------------- + + +@dataclass(frozen=True) +class Bounds: + """Axis-aligned bounding box in either pixel or micron coordinates.""" + + x_min: float + x_max: float + y_min: float + y_max: float + + +@dataclass(frozen=True) +class PatchInfo: + """Metadata for a single patch in the grid.""" + + patch_id: str + row: int + col: int + global_bounds_px: Bounds + global_bounds_um: Bounds + core_bounds_px: Bounds + core_bounds_um: Bounds + + +@dataclass +class PatchGridMetadata: + """Full grid metadata, serializable to JSON.""" + + version: str + bundle_path: str + image_height_px: int + image_width_px: int + pixel_size_um: float + transcript_extent_um: Bounds + grid_rows: int + grid_cols: int + overlap_um: float + overlap_px: int + patches: list[PatchInfo] + grid_type: str = "uniform" + + +# --------------------------------------------------------------------------- +# Internal result containers +# --------------------------------------------------------------------------- + + +@dataclass +class _PatchGeoResult: + """Result of parallel GeoJSON processing for a single patch.""" + + features: list[dict] + cell_ids: list[str] + + +@dataclass +class _PatchCsvResult: + """Result of parallel CSV reading for a single patch.""" + + table: pa.Table + has_cell_col: bool + has_x_col: bool + has_y_col: bool + has_gene_col: bool = False + has_feature_name_col: bool = False + + +# --------------------------------------------------------------------------- +# Grid metadata I/O (from grid.py) +# --------------------------------------------------------------------------- + + +def _dict_to_bounds(d: dict) -> Bounds: + return Bounds(d["x_min"], d["x_max"], d["y_min"], d["y_max"]) + + +def load_grid_metadata(input_path: Path) -> PatchGridMetadata: + """Deserialize PatchGridMetadata from JSON. + + Args: + input_path: Path to JSON file to read. + + Returns: + Reconstructed PatchGridMetadata. + """ + with open(input_path) as f: + data = json.load(f) + + patches = [ + PatchInfo( + patch_id=p["patch_id"], + row=p["row"], + col=p["col"], + global_bounds_px=_dict_to_bounds(p["global_bounds_px"]), + global_bounds_um=_dict_to_bounds(p["global_bounds_um"]), + core_bounds_px=_dict_to_bounds(p["core_bounds_px"]), + core_bounds_um=_dict_to_bounds(p["core_bounds_um"]), + ) + for p in data["patches"] + ] + + return PatchGridMetadata( + version=data["version"], + bundle_path=data["bundle_path"], + image_height_px=data["image_height_px"], + image_width_px=data["image_width_px"], + pixel_size_um=data["pixel_size_um"], + transcript_extent_um=_dict_to_bounds(data["transcript_extent_um"]), + grid_rows=data["grid_rows"], + grid_cols=data["grid_cols"], + overlap_um=data["overlap_um"], + overlap_px=data["overlap_px"], + grid_type=data.get("grid_type", "uniform"), + patches=patches, + ) + + +# --------------------------------------------------------------------------- +# GeoJSON I/O (from polygon_io.py) +# --------------------------------------------------------------------------- + + +def _normalize_geometry_collection(geojson: dict) -> dict: + """Convert a GeometryCollection to a FeatureCollection. + + proseg-to-baysor produces a non-standard GeoJSON GeometryCollection where + each geometry object has a custom ``cell`` key (bare integer) instead of + using Feature wrappers. This normalises it to a standard FeatureCollection + with ``id`` and ``properties.cell_id`` on each feature, using the + ``"cell-{N}"`` format that matches the companion CSV. + + Args: + geojson: Parsed GeoJSON dict with type GeometryCollection. + + Returns: + Standard FeatureCollection dict. + """ + features = [] + for geom in geojson.get("geometries", []): + cell_raw = geom.get("cell", "") + cell_id = str(cell_raw) + clean_geom = {k: v for k, v in geom.items() if k != "cell"} + feature = { + "type": "Feature", + "id": cell_id, + "geometry": clean_geom, + "properties": {"cell_id": cell_id}, + } + features.append(feature) + return {"type": "FeatureCollection", "features": features} + + +def read_geojson(geojson_path: Path) -> dict: + """Read a GeoJSON file and normalise to FeatureCollection. + + Handles both standard FeatureCollections and the GeometryCollection + format produced by proseg-to-baysor. + + Args: + geojson_path: Path to the GeoJSON file. + + Returns: + Parsed GeoJSON dict (always a FeatureCollection). + """ + with open(geojson_path) as f: + data = json.load(f) + if data.get("type") == "GeometryCollection": + return _normalize_geometry_collection(data) + return data + + +def transform_polygons(geojson: dict, offset_x: float, offset_y: float) -> dict: + """Shift all polygon coordinates by (offset_x, offset_y). + + Args: + geojson: Input FeatureCollection. + offset_x: Translation in x. + offset_y: Translation in y. + + Returns: + New FeatureCollection with shifted geometries. + """ + features = [] + for feat in geojson.get("features", []): + geom = shape(feat["geometry"]) + shifted = translate(geom, xoff=offset_x, yoff=offset_y) + new_feat = {**feat, "geometry": mapping(shifted)} + features.append(new_feat) + return {"type": "FeatureCollection", "features": features} + + +def write_geojson(geojson: dict, output_path: Path) -> None: + """Write a GeoJSON FeatureCollection. + + Args: + geojson: GeoJSON dict to write. + output_path: Destination path (parent dirs created automatically). + """ + output_path.parent.mkdir(parents=True, exist_ok=True) + with open(output_path, "w") as f: + json.dump(geojson, f) + + +# --------------------------------------------------------------------------- +# Arrow utilities (from _arrow_utils.py) +# --------------------------------------------------------------------------- + + +def float_str_array(f64_array: pa.Array) -> pa.Array: + """Convert a float64 pyarrow array to string using Python's str(float) format. + + pyarrow's built-in cast omits trailing '.0' for whole numbers. This + function ensures output matches str(float(...)) for CSV compatibility. + + Args: + f64_array: Float64 pyarrow array to convert. + + Returns: + String pyarrow array with Python-formatted float values. + """ + return pa.array( + [str(v) if v is not None else None for v in f64_array.to_pylist()], + type=pa.string(), + ) + + +# --------------------------------------------------------------------------- +# Parallel I/O +# --------------------------------------------------------------------------- + + +def _read_and_transform_geojson( + patch: PatchInfo, + patches_dir: Path, + geojson_filename: str, +) -> _PatchGeoResult | None: + """Read, transform GeoJSON for a single patch (no core clipping). + + Args: + patch: Patch metadata. + patches_dir: Root patches directory. + geojson_filename: GeoJSON filename within each patch directory. + + Returns: + _PatchGeoResult with features and cell IDs, or None if no GeoJSON. + """ + geojson_path = patches_dir / patch.patch_id / geojson_filename + if not geojson_path.exists(): + return None + + geojson = read_geojson(geojson_path) + + offset_x = patch.global_bounds_um.x_min + offset_y = patch.global_bounds_um.y_min + geojson = transform_polygons(geojson, offset_x, offset_y) + + features = geojson.get("features", []) + seen: set[str] = set() + cell_ids: list[str] = [] + for feat in features: + old_id = str(feat.get("id", feat.get("properties", {}).get("cell_id", ""))) + if old_id not in seen: + seen.add(old_id) + cell_ids.append(old_id) + + return _PatchGeoResult(features=features, cell_ids=cell_ids) + + +def _read_patch_csv( + patch: PatchInfo, + patches_dir: Path, + csv_filename: str, +) -> _PatchCsvResult | None: + """Read a patch CSV into a pyarrow Table. + + All columns are read as strings to preserve exact formatting. + + Args: + patch: Patch metadata. + patches_dir: Root patches directory. + csv_filename: CSV filename within each patch directory. + + Returns: + _PatchCsvResult with the table and column presence flags, or None. + """ + csv_path = patches_dir / patch.patch_id / csv_filename + if not csv_path.exists(): + return None + + with open(csv_path) as fh: + header_line = fh.readline().strip() + col_names = header_line.split(",") + all_string_types = {name: pa.string() for name in col_names} + + table = pa_csv.read_csv( + csv_path, + convert_options=pa_csv.ConvertOptions( + column_types=all_string_types, + strings_can_be_null=False, + ), + read_options=pa_csv.ReadOptions(use_threads=True), + ) + + return _PatchCsvResult( + table=table, + has_cell_col="cell" in table.column_names, + has_x_col="x" in table.column_names, + has_y_col="y" in table.column_names, + has_gene_col="gene" in table.column_names, + has_feature_name_col="feature_name" in table.column_names, + ) + + +# --------------------------------------------------------------------------- +# CSV processing +# --------------------------------------------------------------------------- + + +def _transform_patch_coords( + csv_result: _PatchCsvResult, + offset_x: float, + offset_y: float, +) -> pa.Table: + """Shift transcript coordinates from local patch space to global space. + + Args: + csv_result: The raw CSV table and column flags. + offset_x: X offset for coordinate transform (microns). + offset_y: Y offset for coordinate transform (microns). + + Returns: + Table with x, y columns shifted to global coordinates. + """ + table = csv_result.table + + if table.num_rows == 0: + return table + + if csv_result.has_x_col: + x_f64 = pc.add( + table.column("x").cast(pa.float64()), + pa.scalar(offset_x, type=pa.float64()), + ) + table = table.set_column( + table.schema.get_field_index("x"), + "x", + float_str_array(x_f64), + ) + if csv_result.has_y_col: + y_f64 = pc.add( + table.column("y").cast(pa.float64()), + pa.scalar(offset_y, type=pa.float64()), + ) + table = table.set_column( + table.schema.get_field_index("y"), + "y", + float_str_array(y_f64), + ) + + return table + + +# --------------------------------------------------------------------------- +# Sopa conflict resolution +# --------------------------------------------------------------------------- + + +def _stitch_sopa_resolve( + metadata: PatchGridMetadata, + geo_results: list[_PatchGeoResult | None], + csv_results: list[_PatchCsvResult | None], + all_geojson_features: list[dict], + all_tables: list[pa.Table], + threshold: float = 0.5, +) -> set[str]: + """Stitch per-patch segmentation using spatial containment assignment. + + 1. Collect ALL non-empty polygons from all patches (no transcript filtering). + 2. Resolve overlapping polygons via sopa's solve_conflicts(). + 3. Assign sequential global cell IDs (cell-1, cell-2, ...). + 4. Spatially assign transcripts to resolved polygons using STRtree. + 5. Noise transcripts (outside all polygons) kept only from their core patch. + + This approach works regardless of whether Baysor's CSV ``cell`` column + matches GeoJSON cell IDs -- all assignment is done by spatial containment. + + Args: + metadata: Grid metadata with patch list. + geo_results: Per-patch GeoJSON results (already in global coords). + csv_results: Per-patch CSV results. + all_geojson_features: Output list to append resolved GeoJSON features. + all_tables: Output list to append processed CSV tables. + threshold: Overlap threshold for sopa's solve_conflicts (0-1). + + Returns: + Set of global cell IDs created by merging overlapping cells. + """ + # --- Phase 1: Collect all polygons from all patches --- + all_polygons: list = [] + patch_indices_list: list[int] = [] + + for i, patch in enumerate(metadata.patches): + geo_result = geo_results[i] + if geo_result is None: + continue + + for feat in geo_result.features: + polygon = shape(feat["geometry"]) + if polygon.is_empty: + continue + if not polygon.is_valid: + polygon = shapely.make_valid(polygon) + if polygon.is_empty: + continue + # make_valid can produce MultiPolygon/GeometryCollection; + # xeniumranger only accepts Polygon, so keep largest component + if polygon.geom_type == "MultiPolygon": + polygon = max(polygon.geoms, key=lambda g: g.area) + elif polygon.geom_type == "GeometryCollection": + polys = [g for g in polygon.geoms if g.geom_type == "Polygon"] + if not polys: + continue + polygon = max(polys, key=lambda g: g.area) + + all_polygons.append(polygon) + patch_indices_list.append(i) + + if not all_polygons: + print("[stitch] No polygons found in any patch") + # Still transform and collect CSVs as noise-only + for i, patch in enumerate(metadata.patches): + csv_result = csv_results[i] + if csv_result is None: + continue + offset_x = patch.global_bounds_um.x_min + offset_y = patch.global_bounds_um.y_min + transformed = _transform_patch_coords(csv_result, offset_x, offset_y) + if transformed.num_rows > 0: + all_tables.append(transformed) + return set() + + # --- Phase 2: Resolve overlapping polygons via sopa --- + patch_idx_array = np.array(patch_indices_list, dtype=np.int64) + input_gdf = gpd.GeoDataFrame(geometry=all_polygons) + resolved_gdf, kept_indices = solve_conflicts( + input_gdf, + threshold=threshold, + patch_indices=patch_idx_array, + return_indices=True, + ) + + # --- Phase 3: Assign global cell IDs to resolved polygons --- + merged_cell_ids: set[str] = set() + kept_arr = np.asarray(kept_indices) + resolved_polys: list = [] + resolved_ids: list[str] = [] + + for rank, orig_idx in enumerate(kept_arr, start=1): + global_id = f"cell-{rank}" + geom = resolved_gdf.geometry.iloc[rank - 1] + + # solve_conflicts union can produce MultiPolygon; keep largest + if geom.geom_type == "MultiPolygon": + geom = max(geom.geoms, key=lambda g: g.area) + elif geom.geom_type == "GeometryCollection": + polys = [g for g in geom.geoms if g.geom_type == "Polygon"] + if not polys: + continue + geom = max(polys, key=lambda g: g.area) + + if orig_idx < 0: + merged_cell_ids.add(global_id) + + resolved_polys.append(geom) + resolved_ids.append(global_id) + + all_geojson_features.append( + { + "type": "Feature", + "id": global_id, + "geometry": mapping(geom), + "properties": {"cell_id": global_id}, + } + ) + + print( + f"[stitch] Resolved {len(all_polygons)} input polygons to " + f"{len(resolved_polys)} cells ({len(merged_cell_ids)} merged)" + ) + + # --- Phase 4: Spatial transcript assignment via STRtree --- + poly_tree = shapely.STRtree(resolved_polys) + + for i, patch in enumerate(metadata.patches): + csv_result = csv_results[i] + if csv_result is None: + continue + + offset_x = patch.global_bounds_um.x_min + offset_y = patch.global_bounds_um.y_min + core = patch.core_bounds_um + + transformed = _transform_patch_coords(csv_result, offset_x, offset_y) + if transformed.num_rows == 0: + continue + + if not csv_result.has_x_col or not csv_result.has_y_col: + all_tables.append(transformed) + continue + + # Get global coordinates for spatial query + gx = transformed.column("x").cast(pa.float64()).to_numpy(zero_copy_only=False) + gy = transformed.column("y").cast(pa.float64()).to_numpy(zero_copy_only=False) + points = shapely.points(gx, gy) + + # Query STRtree: returns (input_indices, tree_indices) + point_hits, poly_hits = poly_tree.query(points, predicate="intersects") + + # Build point -> cell_id mapping (first hit wins) + point_to_cell: dict[int, str] = {} + for pt_idx, poly_idx in zip(point_hits, poly_hits): + if pt_idx not in point_to_cell: + point_to_cell[pt_idx] = resolved_ids[poly_idx] + + # Build cell and is_noise columns + n_rows = transformed.num_rows + cell_arr = [""] * n_rows + is_noise_arr = ["true"] * n_rows + for pt_idx, cell_id in point_to_cell.items(): + cell_arr[pt_idx] = cell_id + is_noise_arr[pt_idx] = "false" + + # Filter noise transcripts to core bounds only + # Assigned transcripts are kept from all patches (dedup later by transcript_id) + in_core = ( + (gx >= core.x_min) + & (gx < core.x_max) + & (gy >= core.y_min) + & (gy < core.y_max) + ) + is_assigned = np.array([c != "" for c in cell_arr]) + keep_mask = pa.array(is_assigned | in_core, type=pa.bool_()) + + filtered = transformed.filter(keep_mask) + cell_arr_filtered = [c for c, k in zip(cell_arr, (is_assigned | in_core)) if k] + is_noise_filtered = [ + n for n, k in zip(is_noise_arr, (is_assigned | in_core)) if k + ] + + if filtered.num_rows == 0: + continue + + # Set cell and is_noise columns + cell_idx = ( + filtered.schema.get_field_index("cell") + if "cell" in filtered.column_names + else None + ) + if cell_idx is not None: + filtered = filtered.set_column( + cell_idx, "cell", pa.array(cell_arr_filtered, type=pa.string()) + ) + else: + filtered = filtered.append_column( + "cell", pa.array(cell_arr_filtered, type=pa.string()) + ) + + noise_idx = ( + filtered.schema.get_field_index("is_noise") + if "is_noise" in filtered.column_names + else None + ) + if noise_idx is not None: + filtered = filtered.set_column( + noise_idx, + "is_noise", + pa.array(is_noise_filtered, type=pa.string()), + ) + else: + filtered = filtered.append_column( + "is_noise", pa.array(is_noise_filtered, type=pa.string()) + ) + + all_tables.append(filtered) + + return merged_cell_ids + + +# --------------------------------------------------------------------------- +# Main orchestrator +# --------------------------------------------------------------------------- + + +def stitch_transcript_assignments( + patches_dir: Path, + output_dir: Path, + csv_filename: str = "segmentation.csv", + geojson_filename: str = "segmentation_polygons.json", + max_workers: int | None = None, + min_transcripts_per_cell: int = 0, +) -> None: + """Stitch per-patch transcript assignments and polygons into unified output. + + For each patch, reads the transcript assignment CSV and polygon GeoJSON. + Cells are deduplicated using sopa's solve_conflicts() which resolves + overlapping cells at patch boundaries based on area overlap ratio. + + Processing is split into a parallel I/O phase (reading GeoJSON and CSV + files via thread pool) and a sequential phase (dedup, global cell ID + assignment, remapping, and concatenation). + + Args: + patches_dir: Directory containing patch subdirectories and patch_grid.json. + output_dir: Output directory for stitched CSV and GeoJSON. + csv_filename: CSV filename within each patch directory. + geojson_filename: GeoJSON filename within each patch directory. + max_workers: Maximum number of threads for parallel I/O. + """ + patches_dir = Path(patches_dir) + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + metadata = load_grid_metadata(patches_dir / "patch_grid.json") + + n_patches = len(metadata.patches) + if max_workers is None: + max_workers = min(n_patches, os.cpu_count() or 1) + + # ---- Parallel phase: read GeoJSON and CSV files concurrently ---- + with ThreadPoolExecutor(max_workers=max_workers) as executor: + geo_futures = [ + executor.submit( + _read_and_transform_geojson, p, patches_dir, geojson_filename + ) + for p in metadata.patches + ] + csv_futures = [ + executor.submit(_read_patch_csv, p, patches_dir, csv_filename) + for p in metadata.patches + ] + geo_results = [f.result() for f in geo_futures] + csv_results = [f.result() for f in csv_futures] + + # ---- Sequential phase: assign global cell IDs, remap, concatenate ---- + all_tables: list[pa.Table] = [] + all_geojson_features: list[dict] = [] + + _stitch_sopa_resolve( + metadata, + geo_results, + csv_results, + all_geojson_features, + all_tables, + threshold=0.5, + ) + + # Concatenate all patch tables + if all_tables: + merged = pa.concat_tables(all_tables) + + # Deduplicate by transcript_id: prefer assigned over noise + if "transcript_id" in merged.column_names: + if "cell" in merged.column_names: + is_noise = pc.equal(merged.column("cell"), "").cast(pa.int8()) + row_order = pa.array(np.arange(merged.num_rows), type=pa.int64()) + sort_table = pa.table({"_noise": is_noise, "_row": row_order}) + sort_indices = pc.sort_indices( + sort_table, + sort_keys=[("_noise", "ascending"), ("_row", "ascending")], + ) + merged = merged.take(sort_indices) + + tid_np = merged.column("transcript_id").to_numpy(zero_copy_only=False) + _, first_indices = np.unique(tid_np, return_index=True) + first_indices.sort() + merged = merged.take(first_indices) + + # Post-stitch cell filter: drop cells below min_transcripts_per_cell + if min_transcripts_per_cell > 0 and "cell" in merged.column_names: + cell_col = merged.column("cell") + cell_counts: dict[str, int] = {} + for c in cell_col.to_pylist(): + if c: + cell_counts[c] = cell_counts.get(c, 0) + 1 + small_cells = { + cid + for cid, cnt in cell_counts.items() + if cnt < min_transcripts_per_cell + } + if small_cells: + # Reassign transcripts from small cells to noise + new_cell = ["" if c in small_cells else c for c in cell_col.to_pylist()] + new_noise = [ + "true" if c in small_cells else n + for c, n in zip( + cell_col.to_pylist(), + merged.column("is_noise").to_pylist() + if "is_noise" in merged.column_names + else ["false"] * merged.num_rows, + ) + ] + cidx = merged.column_names.index("cell") + merged = merged.set_column( + cidx, "cell", pa.array(new_cell, type=pa.string()) + ) + if "is_noise" in merged.column_names: + nidx = merged.column_names.index("is_noise") + merged = merged.set_column( + nidx, "is_noise", pa.array(new_noise, type=pa.string()) + ) + # Remove filtered cells from GeoJSON + all_geojson_features[:] = [ + f + for f in all_geojson_features + if str(f.get("id", f.get("properties", {}).get("cell_id", ""))) + not in small_cells + ] + print( + f"[stitch] Filtered {len(small_cells)} cells with " + f"<{min_transcripts_per_cell} transcripts" + ) + + # Log assignment stats + if "cell" in merged.column_names: + cell_vals = merged.column("cell").to_pylist() + n_assigned = sum(1 for c in cell_vals if c) + n_noise = sum(1 for c in cell_vals if not c) + print( + f"[stitch] Final: {merged.num_rows} transcripts, " + f"{n_assigned} assigned, {n_noise} noise" + ) + + # Cast is_noise to integer for xeniumranger compatibility + if "is_noise" in merged.column_names: + noise_col = merged.column("is_noise") + if noise_col.type == pa.string(): + lower = pc.utf8_lower(noise_col) + is_true = pc.or_(pc.equal(lower, "true"), pc.equal(lower, "1")) + idx = merged.column_names.index("is_noise") + merged = merged.set_column(idx, "is_noise", is_true.cast(pa.int8())) + + # Write CSV + if merged.num_rows > 0: + csv_out = output_dir / "xr-transcript-metadata.csv" + pa_csv.write_csv( + merged, + csv_out, + write_options=pa_csv.WriteOptions(quoting_style="needed"), + ) + + # Safety net: remove orphan polygons with zero transcripts + if all_geojson_features and all_tables: + csv_cell_ids: set[str] = set() + if "cell" in merged.column_names: + csv_cell_ids = set(c for c in merged.column("cell").to_pylist() if c) + all_geojson_features = [ + f + for f in all_geojson_features + if str(f.get("id", f.get("properties", {}).get("cell_id", ""))) + in csv_cell_ids + ] + + # Write merged GeoJSON + if all_geojson_features: + merged_geo = {"type": "FeatureCollection", "features": all_geojson_features} + write_geojson(merged_geo, output_dir / "xr-cell-polygons.geojson") + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Stitch per-patch Baysor segmentation results into unified output." + ) + parser.add_argument( + "--patches", + type=Path, + required=True, + help="Directory containing patch subdirectories and patch_grid.json", + ) + parser.add_argument( + "--output", + type=Path, + required=True, + help="Output directory for stitched CSV and GeoJSON", + ) + parser.add_argument( + "--csv-filename", + default="segmentation.csv", + help="CSV filename within each patch (default: segmentation.csv)", + ) + parser.add_argument( + "--geojson-filename", + default="segmentation_polygons.json", + help="GeoJSON filename within each patch (default: segmentation_polygons.json)", + ) + parser.add_argument( + "--min-transcripts-per-cell", + type=int, + default=0, + help="Drop cells with fewer transcripts (0 = no filter, default: 0)", + ) + args = parser.parse_args() + + stitch_transcript_assignments( + patches_dir=args.patches, + output_dir=args.output, + csv_filename=args.csv_filename, + geojson_filename=args.geojson_filename, + min_transcripts_per_cell=args.min_transcripts_per_cell, + ) + + +if __name__ == "__main__": + main() diff --git a/bin/utility_convert_mask_uint32.py b/bin/utility_convert_mask_uint32.py new file mode 100755 index 00000000..955ad4b7 --- /dev/null +++ b/bin/utility_convert_mask_uint32.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +""" +Convert a segmentation mask TIFF to uint32 dtype. + +XeniumRanger import-segmentation requires uint32 masks, but upstream +segmenters (e.g. StarDist) often emit int32 labels. This script reads +the input mask, casts it to uint32, and writes the result. +""" + +import argparse + +import numpy as np +import tifffile + + +def convert_mask_to_uint32(input_path: str, output_path: str) -> None: + """ + Read a mask TIFF, cast to uint32, and write to output_path. + + Args: + input_path: Path to input mask TIFF (any integer dtype). + output_path: Path where the uint32 mask will be written. + """ + mask = tifffile.imread(input_path) + print(f"Input dtype: {mask.dtype}, shape: {mask.shape}, labels: {mask.max()}") + tifffile.imwrite(output_path, mask.astype(np.uint32)) + print("Output dtype: uint32") + + +def parse_args() -> argparse.Namespace: + """Parse command-line arguments.""" + parser = argparse.ArgumentParser( + description="Convert a segmentation mask TIFF to uint32 dtype." + ) + parser.add_argument( + "--input", required=True, help="Path to input mask TIFF" + ) + parser.add_argument( + "--output", required=True, help="Path where uint32 mask will be written" + ) + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + convert_mask_to_uint32(input_path=args.input, output_path=args.output) diff --git a/bin/utility_downscale_morphology.py b/bin/utility_downscale_morphology.py new file mode 100755 index 00000000..8544ecf3 --- /dev/null +++ b/bin/utility_downscale_morphology.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 +""" +Pre-downscale a morphology image for Cellpose. + +Reduces image dimensions by a scale factor so that Cellpose's internal +rescaling (diam_mean / diameter) does not exceed GPU/CPU memory. The +scale factor defaults to diameter / diam_mean (e.g., 9 / 30 = 0.3). +After downscaling, Cellpose should run with --diameter equal to +diam_mean (no further internal rescaling). + +Outputs: + {prefix}/downscaled.tif - Downscaled image at the same dtype as input. + {prefix}/scale_info.json - Scale factor and original/new dimensions. +""" + +import argparse +import json +from pathlib import Path + +import tifffile +from skimage.transform import resize + +# Cellpose network requires a minimum spatial size of 256 px. +MIN_DIM = 256 + + +def downscale_image( + image_path: str, diameter: float, diam_mean: float, prefix: str +) -> None: + """ + Downscale image so Cellpose can run with diameter == diam_mean. + + Args: + image_path: Path to morphology TIFF (2D, 3D, or 4D). + diameter: Target object diameter (used to compute scale). + diam_mean: Cellpose model's mean diameter assumption. + prefix: Output directory. + """ + scale = min(diameter / diam_mean, 1.0) # clamp to prevent upscaling + + img = tifffile.imread(image_path) + print(f"Original: {img.shape}, dtype={img.dtype}, ndim={img.ndim}") + + # Handle multichannel OME-TIFFs: shape can be (H, W), (C, H, W), or (Z, C, H, W) + if img.ndim == 2: + orig_h, orig_w = img.shape + new_h = max(int(orig_h * scale), MIN_DIM) + new_w = max(int(orig_w * scale), MIN_DIM) + output_shape = (new_h, new_w) + elif img.ndim == 3: + orig_h, orig_w = img.shape[1], img.shape[2] + new_h = max(int(orig_h * scale), MIN_DIM) + new_w = max(int(orig_w * scale), MIN_DIM) + output_shape = (img.shape[0], new_h, new_w) + else: + orig_h, orig_w = img.shape[-2], img.shape[-1] + new_h = max(int(orig_h * scale), MIN_DIM) + new_w = max(int(orig_w * scale), MIN_DIM) + output_shape = img.shape[:-2] + (new_h, new_w) + + print(f"Downscaling by {scale:.3f}: ({orig_h}, {orig_w}) -> ({new_h}, {new_w})") + + img_ds = resize(img, output_shape, order=3, preserve_range=True, anti_aliasing=True) + img_ds = img_ds.astype(img.dtype) + + out_dir = Path(prefix) + out_dir.mkdir(parents=True, exist_ok=True) + tifffile.imwrite(str(out_dir / "downscaled.tif"), img_ds, compression="zlib") + + info = { + "scale": scale, + "orig_h": orig_h, + "orig_w": orig_w, + "new_h": new_h, + "new_w": new_w, + "diameter": diameter, + "diam_mean": diam_mean, + } + with open(out_dir / "scale_info.json", "w") as f: + json.dump(info, f) + print(f"Done: downscaled.tif written, shape={img_ds.shape}") + + +def parse_args() -> argparse.Namespace: + """Parse command-line arguments.""" + parser = argparse.ArgumentParser( + description="Pre-downscale a morphology image for Cellpose." + ) + parser.add_argument("--image", required=True, help="Morphology TIFF input") + parser.add_argument("--diameter", type=float, required=True, help="Target object diameter") + parser.add_argument("--diam-mean", type=float, required=True, help="Cellpose model diam_mean") + parser.add_argument("--prefix", required=True, help="Output directory") + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + downscale_image( + image_path=args.image, + diameter=args.diameter, + diam_mean=args.diam_mean, + prefix=args.prefix, + ) diff --git a/bin/utility_extract_dapi.py b/bin/utility_extract_dapi.py new file mode 100755 index 00000000..3d60f563 --- /dev/null +++ b/bin/utility_extract_dapi.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +""" +Extract a single channel (e.g., DAPI) from a multi-channel OME-TIFF. + +Xenium morphology_focus.ome.tif has multiple channels (DAPI, boundary, +interior). Single-channel segmenters such as StarDist 2D_versatile_fluo +expect one channel as input. This script reads the input image, slices +the requested channel, and writes the result. +""" + +import argparse + +import tifffile + + +def extract_channel(input_path: str, output_path: str, channel_index: int) -> None: + """ + Read an OME-TIFF, extract a single channel, and write the result. + + Args: + input_path: Path to multi-channel OME-TIFF morphology image. + output_path: Path where the single-channel TIFF will be written. + channel_index: Index of the channel to extract. + """ + img = tifffile.imread(input_path) + orig_shape = img.shape + + if img.ndim == 3: + img = img[channel_index] + elif img.ndim == 4: + img = img[0, channel_index] + + tifffile.imwrite(output_path, img) + print(f"Input shape: {orig_shape} -> extracted channel {channel_index}: {img.shape}") + + +def parse_args() -> argparse.Namespace: + """Parse command-line arguments.""" + parser = argparse.ArgumentParser( + description="Extract a single channel from a multi-channel OME-TIFF." + ) + parser.add_argument( + "--input", required=True, help="Path to multi-channel OME-TIFF morphology image" + ) + parser.add_argument( + "--output", required=True, help="Path where the single-channel TIFF will be written" + ) + parser.add_argument( + "--channel-index", type=int, default=0, help="Channel index to extract (default: 0)" + ) + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + extract_channel( + input_path=args.input, + output_path=args.output, + channel_index=args.channel_index, + ) diff --git a/bin/utility_extract_preview_data.py b/bin/utility_extract_preview_data.py new file mode 100755 index 00000000..0ea737c2 --- /dev/null +++ b/bin/utility_extract_preview_data.py @@ -0,0 +1,208 @@ +#!/usr/bin/env python3 +""" +Extract preview data from Baysor preview HTML reports. + +Parses embedded Vega-Lite spec variables and base64 PNG images from the +Baysor preview.html file, writing MultiQC-compatible TSV and PNG files. +""" + +import argparse +import base64 +import html +import json +import re +import sys +from pathlib import Path +from typing import Dict, List, Optional, Tuple + +import pandas as pd +from bs4 import BeautifulSoup + + +def get_png_files(soup: BeautifulSoup, outdir: Path) -> None: + """Get png base64 images following specific h1 tags in preview.html""" + target_ids = ["Transcript_Plots", "Noise_Level"] + outdir.mkdir(parents=True, exist_ok=True) + + for h1_id in target_ids: + h1_tag = soup.find("h1", id=h1_id) + if not h1_tag: + print(f"[WARN] No

with id {h1_id} found") + continue + + # Look for the first after the h1 in the DOM + img_tag = h1_tag.find_next("img") + if not img_tag or not img_tag.get("src"): + print(f"[WARN] No found after h1#{h1_id}") + continue + + img_src = img_tag["src"] + if img_src.startswith("data:image/png;base64,"): + base64_data = img_src.split(",", 1)[1] + data = base64.b64decode(base64_data) + else: + print(f"[WARN] img src is not base64 PNG for h1#{h1_id}") + continue + + # save png files with _mqc suffix for MultiQC integration + img_name = f"{h1_id}_mqc.png".lower() + out_path = outdir / img_name + with open(out_path, "wb") as f: + f.write(data) + + print(f"[INFO] Saved {img_name}") + + return None + + +def extract_js_object(text: str, start_idx: int) -> Tuple[Optional[str], int]: + """Extract json-like object starting at start_idx.""" + if start_idx >= len(text) or text[start_idx] != "{": + return None, start_idx + + stack, in_str, escape, quote = [], False, False, None + for i in range(start_idx, len(text)): + ch = text[i] + if in_str: + if escape: + escape = False + elif ch == "\\": + escape = True + elif ch == quote: + in_str = False + else: + if ch in ('"', "'"): + in_str, quote = True, ch + elif ch == "{": + stack.append("{") + elif ch == "}": + stack.pop() + if not stack: + return text[start_idx : i + 1], i + 1 + elif ch == "/" and i + 1 < len(text): + # skip js comments + nxt = text[i + 1] + if nxt == "/": + end = text.find("\n", i + 2) + i = len(text) - 1 if end == -1 else end + elif nxt == "*": + end = text.find("*/", i + 2) + if end == -1: + break + i = end + 1 + + return None, start_idx + + +def js_to_json(js: str) -> str: + """Convert a JS object string to valid JSON.""" + # Remove comments + js = re.sub(r"/\*.*?\*/", "", js, flags=re.S) + js = re.sub(r"//[^\n]*", "", js) + + # Convert single-quoted strings to double-quoted strings + js = re.sub( + r"'((?:\\.|[^'\\])*)'", + lambda m: '"' + m.group(1).replace('"', '\\"') + '"', + js, + ) + + # Remove trailing commas + js = re.sub(r",\s*(?=[}\]])", "", js) + js = re.sub(r",\s*,+", ",", js) + + return js.strip() + + +def find_variables(script_text: str) -> Dict[str, str]: + """Find all 'var|let|const specN =' declarations and extract their objects.""" + specs: Dict[str, str] = {} + script_text = html.unescape(script_text) + pattern = re.compile(r"(?:var|let|const)\s+(spec\d+)\s*=\s*{", re.I) + + for match in pattern.finditer(script_text): + var = match.group(1) + obj, _ = extract_js_object(script_text, match.end() - 1) + if obj: + specs[var] = obj + else: + print(f"[WARN] Could not extract object for {var}") + return specs + + +def write_tsvs(specs: Dict[str, str], outdir: Path) -> List[Path]: + """Convert extracted json to tsv.""" + outdir.mkdir(parents=True, exist_ok=True) + written: List[Path] = [] + + for var, js_obj in specs.items(): + try: + data = json.loads(js_to_json(js_obj)) + values = data.get("data", {}).get("values", []) + if not values: + print(f"[WARN] No data.values found in {var}") + continue + + df = pd.DataFrame(values) + outpath = outdir / f"{var}_mqc.tsv" + + with open(outpath, "w") as f: + f.write("# plot_type: linegraph\n") + f.write(f"# section_name: {var}\n") + f.write("# description: Extracted preview data\n") + df.to_csv(f, sep="\t", index=False) + + written.append(outpath) + print(f"[INFO] Wrote {outpath} ({len(df)} rows × {len(df.columns)} cols)") + except Exception as e: + print(f"[ERROR] Failed to process {var}: {e}") + + return written + + +def parse_args() -> argparse.Namespace: + """Parse command-line arguments.""" + parser = argparse.ArgumentParser( + description="Extract preview data from Baysor preview HTML reports." + ) + parser.add_argument( + "--preview-html", + required=True, + help="Path to Baysor preview HTML file", + ) + parser.add_argument( + "--prefix", + required=True, + help="Output directory prefix (sample ID)", + ) + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + + input_path: Path = Path(args.preview_html) + outdir: Path = Path(args.prefix) + + text = input_path.read_text(encoding="utf-8", errors="ignore") + soup = BeautifulSoup(text, "html.parser") + + # get the script section + if " argparse.Namespace: + """Parse command-line arguments.""" + parser = argparse.ArgumentParser( + description="Get transcript coordinate bounds from a Parquet file." + ) + parser.add_argument( + "--transcripts", + required=True, + help="Path to transcripts parquet file", + ) + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + result = get_coordinates(args.transcripts) + print(",".join(str(v) for v in result)) diff --git a/bin/utility_parquet_to_csv.py b/bin/utility_parquet_to_csv.py new file mode 100755 index 00000000..bfa19c40 --- /dev/null +++ b/bin/utility_parquet_to_csv.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +""" +Convert a Parquet file to CSV format. + +Reads a Parquet file and writes it as CSV, optionally gzip-compressed. +""" + +import argparse +from pathlib import Path + +import pandas as pd + + +def convert_parquet( + transcripts: str, + extension: str = ".csv", + prefix: str = "", +) -> None: + """ + Convert a Parquet file to CSV or CSV.GZ format. + + Args: + transcripts: Filename of the input parquet file + extension: Output extension ('.csv' or '.gz' for gzip) + prefix: Output directory prefix + """ + df = pd.read_parquet(transcripts, engine="pyarrow") + + Path(prefix).mkdir(parents=True, exist_ok=True) + + if extension == ".gz": + output = transcripts.replace(".parquet", ".csv.gz") + df.to_csv(f"{prefix}/{output}", compression="gzip", index=False) + else: + output = transcripts.replace(".parquet", ".csv") + df.to_csv(f"{prefix}/{output}", index=False) + + return None + + +def parse_args() -> argparse.Namespace: + """Parse command-line arguments.""" + parser = argparse.ArgumentParser( + description="Convert a Parquet file to CSV format." + ) + parser.add_argument( + "--transcripts", + required=True, + help="Input parquet filename", + ) + parser.add_argument( + "--extension", + default=".csv", + help="Output extension: '.csv' or '.gz' (default: .csv)", + ) + parser.add_argument( + "--prefix", + required=True, + help="Output directory prefix (sample ID)", + ) + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + convert_parquet( + transcripts=args.transcripts, + extension=args.extension, + prefix=args.prefix, + ) diff --git a/bin/utility_resize_tif.py b/bin/utility_resize_tif.py new file mode 100755 index 00000000..6cca640d --- /dev/null +++ b/bin/utility_resize_tif.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python3 +""" +Resize a segmentation TIFF mask to match transcript coordinates. + +This script rescales a segmentation mask image to match the coordinate +space of Xenium transcript data using microns-per-pixel metadata. +""" + +import argparse +import json +import os +from typing import Tuple + +import numpy as np +import pandas as pd +import tifffile +from skimage.transform import resize + + +def read_mask(mask_path: str) -> np.ndarray: + """Read the segmentation mask from a TIFF file.""" + print(f"Reading mask: {mask_path}") + mask = tifffile.imread(mask_path) + print(f"Mask shape: {mask.shape}, dtype: {mask.dtype}") + return mask + + +def read_transcript_bounds(transcript_path: str) -> Tuple[float, float, float, float]: + """Read transcript coordinates and return their bounding box.""" + print(f"Reading transcripts: {transcript_path}") + if transcript_path.endswith(".parquet"): + transcripts = pd.read_parquet(transcript_path, columns=["x_location", "y_location"]) + else: + transcripts = pd.read_csv(transcript_path) + + if "x_location" not in transcripts.columns or "y_location" not in transcripts.columns: + raise ValueError("Transcript file must contain 'x_location' and 'y_location' columns.") + + x_min, x_max = transcripts["x_location"].min(), transcripts["x_location"].max() + y_min, y_max = transcripts["y_location"].min(), transcripts["y_location"].max() + + print(f"Transcript bounds: X=({x_min:.2f}, {x_max:.2f}), Y=({y_min:.2f}, {y_max:.2f})") + return x_min, x_max, y_min, y_max + + +def read_microns_per_pixel(metadata_path: str) -> float: + """Extract microns_per_pixel or pixel_size from metadata JSON.""" + print(f"Reading metadata: {metadata_path}") + with open(metadata_path, "r") as f: + metadata = json.load(f) + + mpp = metadata.get("microns_per_pixel") or metadata.get("pixel_size") + if mpp is None: + raise KeyError("Metadata JSON must contain 'microns_per_pixel' or 'pixel_size'.") + + print(f"Microns per pixel: {mpp}") + return float(mpp) + + +def compute_target_size( + x_min: float, x_max: float, y_min: float, y_max: float, microns_per_pixel: float +) -> Tuple[int, int]: + """Compute new image size (in pixels) to cover given coordinates.""" + new_width = int(round((x_max - x_min) / microns_per_pixel)) + new_height = int(round((y_max - y_min) / microns_per_pixel)) + print(f"Target image size: {new_width} x {new_height} pixels") + return new_height, new_width + + +def resize_mask(mask: np.ndarray, new_shape: Tuple[int, int]) -> np.ndarray: + """Resize mask using nearest-neighbor interpolation (preserve labels).""" + print("Resizing mask...") + resized = resize( + mask, + new_shape, + order=0, # nearest neighbor to preserve segmentation labels + preserve_range=True, + anti_aliasing=False, + ).astype(mask.dtype) + print(f"Resized shape: {resized.shape}") + return resized + + +def main(mask_path: str, transcripts_path: str, metadata_path: str, output_path: str) -> None: + """Resize segmentation mask to match Xenium coordinate space.""" + # Validate input files + for path in [mask_path, transcripts_path, metadata_path]: + if not os.path.exists(path): + raise FileNotFoundError(f"File not found: {path}") + + # Load data + mask = read_mask(mask_path) + x_min, x_max, y_min, y_max = read_transcript_bounds(transcripts_path) + microns_per_pixel = read_microns_per_pixel(metadata_path) + + # Compute physical mask size + height, width = mask.shape + print(f"Original mask size: {width * microns_per_pixel:.2f} x {height * microns_per_pixel:.2f} um") + + # Compute target size + new_height, new_width = compute_target_size(x_min, x_max, y_min, y_max, microns_per_pixel) + + # Resize and save + resized_mask = resize_mask(mask, (new_height, new_width)) + tifffile.imwrite(output_path, resized_mask) + + print(f"Saved resized mask -> {output_path}") + + +def parse_args() -> argparse.Namespace: + """Parse command-line arguments.""" + parser = argparse.ArgumentParser( + description="Resize a segmentation TIFF mask to match transcript coordinates." + ) + parser.add_argument("--mask", required=True, help="Path to segmentation mask TIFF") + parser.add_argument("--transcripts", required=True, help="Path to transcripts file") + parser.add_argument("--metadata", required=True, help="Path to metadata JSON") + parser.add_argument("--prefix", required=True, help="Output directory prefix") + parser.add_argument("--mask-filename", required=True, help="Original mask filename for output naming") + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + + os.makedirs(args.prefix, exist_ok=True) + output_mask: str = os.path.join(args.prefix, f"resized_{args.mask_filename}.tif") + + main( + mask_path=args.mask, + transcripts_path=args.transcripts, + metadata_path=args.metadata, + output_path=output_mask, + ) diff --git a/bin/utility_segger2xr.py b/bin/utility_segger2xr.py new file mode 100755 index 00000000..22889e82 --- /dev/null +++ b/bin/utility_segger2xr.py @@ -0,0 +1,247 @@ +#!/usr/bin/env python3 +""" +Convert Segger prediction output to XeniumRanger-compatible format. + +Reads Segger PREDICT output (transcripts.parquet with segger_cell_id), +produces Baysor-format segmentation CSV, refined transcripts parquet, +and GeoJSON cell boundary polygons for xeniumranger import-segmentation. +""" + +import argparse +import json +from pathlib import Path +from typing import List + +import pandas as pd +from scipy.spatial import ConvexHull + +# Expected columns in transcripts.parquet +REQUIRED_COLUMNS: List[str] = [ + "transcript_id", + "cell_id", + "overlaps_nucleus", + "feature_name", + "x_location", + "y_location", + "z_location", + "qv", +] + +# Column name for segger cell assignment (varies by segger version) +SEGGER_ID_CANDIDATES: List[str] = ["segger_cell_id", "segger_id"] + + +def refine_transcripts(parquet_path: str) -> pd.DataFrame: + """ + Read segger PREDICT output and extract cell assignments. + Supports both 'segger_cell_id' (newer) and 'segger_id' (older) column names. + """ + parquet_file = Path(parquet_path) + if not parquet_file.exists(): + raise FileNotFoundError(f"File not found: {parquet_path}") + + df = pd.read_parquet(parquet_file, engine="pyarrow") + + missing_cols = [col for col in REQUIRED_COLUMNS if col not in df.columns] + if missing_cols: + raise ValueError(f"Missing required columns: {missing_cols}") + + # Find segger cell assignment column + segger_col = None + for candidate in SEGGER_ID_CANDIDATES: + if candidate in df.columns: + segger_col = candidate + break + if segger_col is None: + raise ValueError( + f"No segger cell assignment column found. " + f"Expected one of {SEGGER_ID_CANDIDATES}, got columns: {list(df.columns)}" + ) + + # Replace cell_id with segger assignment + cell_id_index = df.columns.get_loc("cell_id") + df = df.drop(columns=["cell_id"]) + segger_series = df.pop(segger_col) + df.insert(cell_id_index, "cell_id", segger_series) + + return df + + +def build_cell_map(df: pd.DataFrame, min_transcripts: int = 3) -> dict: + """ + Build a mapping from raw segger cell IDs to non-numeric string IDs. + + Only includes cells that have: + - >= min_transcripts assigned transcripts + - At least one transcript with valid (non-NaN) x/y coordinates + + Cell IDs use "cell-N" format (hyphen + integer) as required by + xeniumranger's cell ID parser. Non-numeric to avoid polars Int64 inference. + """ + cell_ids = df["cell_id"].fillna("UNASSIGNED").astype(str) + is_unassigned = (cell_ids == "UNASSIGNED") | (cell_ids == "") | (cell_ids == "0") + assigned = cell_ids[~is_unassigned] + counts = assigned.value_counts() + enough_tx = set(counts[counts >= min_transcripts].index) + + # Exclude cells with all-NaN coordinates (no spatial info = useless) + has_coords = df.dropna(subset=["x_location", "y_location"]) + has_coords_ids = set(has_coords["cell_id"].fillna("UNASSIGNED").astype(str)) + valid_cells = sorted(enough_tx & has_coords_ids) + + return {cell: f"cell-{i + 1}" for i, cell in enumerate(valid_cells)} + + +def to_baysor_csv(df: pd.DataFrame, output_path: str, cell_map: dict) -> None: + """ + Convert transcript DataFrame to Baysor-compatible CSV format. + + xeniumranger 4.0 import-segmentation --transcript-assignment expects a + Baysor segmentation CSV with at minimum: transcript_id, cell, is_noise, + x, y columns. This function maps Xenium/Segger columns to Baysor format. + """ + baysor_df = pd.DataFrame() + baysor_df["transcript_id"] = df["transcript_id"] + baysor_df["x"] = df["x_location"] + baysor_df["y"] = df["y_location"] + baysor_df["z"] = df["z_location"] + baysor_df["gene"] = df["feature_name"] + + cell_ids = df["cell_id"].fillna("UNASSIGNED").astype(str) + is_unassigned = (cell_ids == "UNASSIGNED") | (cell_ids == "") | (cell_ids == "0") + baysor_df["cell"] = cell_ids.map(cell_map).fillna("") + baysor_df["is_noise"] = is_unassigned.astype(int) + + baysor_df.to_csv(output_path, index=False) + + n_assigned = (~is_unassigned).sum() + n_noise = is_unassigned.sum() + n_cells = len(cell_map) + print( + f"Baysor CSV: {n_assigned} assigned, {n_noise} noise, {n_cells} cells -> {output_path}" + ) + + +def _make_buffer_polygon(cx: float, cy: float, radius: float = 0.5) -> list: + """Create a small square polygon around a centroid as fallback.""" + return [ + [cx - radius, cy - radius], + [cx + radius, cy - radius], + [cx + radius, cy + radius], + [cx - radius, cy + radius], + [cx - radius, cy - radius], # close ring + ] + + +def generate_viz_polygons(df: pd.DataFrame, output_path: str, cell_map: dict) -> None: + """ + Generate a GeoJSON file with cell boundary polygons. + + Uses ConvexHull when possible; falls back to a small buffer polygon around + the centroid for cells with < 3 unique points or collinear points. + + Required by xeniumranger import-segmentation when using --transcript-assignment. + Each feature MUST have a top-level "id" field (xeniumranger reads item["id"]). + Cell IDs must match those in the Baysor CSV. + """ + assigned = df[ + df["cell_id"].notna() + & (df["cell_id"].astype(str) != "UNASSIGNED") + & (df["cell_id"].astype(str) != "") + ].copy() + + features = [] + grouped = assigned.groupby("cell_id") + + for cell_id, group in grouped: + mapped_id = cell_map.get(str(cell_id)) + if mapped_id is None: + continue + + coords = group[["x_location", "y_location"]].dropna().values + + polygon_coords = None + if len(coords) >= 3: + try: + hull = ConvexHull(coords) + hull_points = coords[hull.vertices].tolist() + hull_points.append(hull_points[0]) # close polygon ring + polygon_coords = hull_points + except Exception: + pass + + # Fallback: buffer polygon around centroid + if polygon_coords is None: + cx, cy = coords.mean(axis=0).astype(float) + polygon_coords = _make_buffer_polygon(cx, cy) + + features.append( + { + "type": "Feature", + "id": mapped_id, + "geometry": { + "type": "Polygon", + "coordinates": [polygon_coords], + }, + "properties": {"cell_id": mapped_id}, + } + ) + + geojson = {"type": "FeatureCollection", "features": features} + + with open(output_path, "w") as f: + json.dump(geojson, f) + + print(f"Generated {len(features)} cell polygons in {output_path}") + + +def main(input_file: str, prefix: str, min_transcripts: int = 3) -> None: + """Run the full segger-to-xeniumranger conversion pipeline.""" + Path(prefix).mkdir(parents=True, exist_ok=True) + transcripts = refine_transcripts(input_file) + + # Build cell ID mapping, filtering cells with < min_transcripts + cell_map = build_cell_map(transcripts, min_transcripts=min_transcripts) + + # xeniumranger 4.0 expects Baysor-format CSV (not parquet) with is_noise column + to_baysor_csv(transcripts, f"{prefix}/segmentation.csv", cell_map) + + # Also save the refined parquet for downstream use + transcripts.to_parquet(f"{prefix}/transcripts.parquet", engine="pyarrow") + + # Generate cell boundary polygons (required companion to --transcript-assignment) + # Uses ConvexHull when possible; falls back to buffer polygon for edge cases + generate_viz_polygons(transcripts, f"{prefix}/segmentation_polygons.json", cell_map) + + +def parse_args() -> argparse.Namespace: + """Parse command-line arguments.""" + parser = argparse.ArgumentParser( + description="Convert Segger prediction output to XeniumRanger-compatible format." + ) + parser.add_argument( + "--transcripts", + required=True, + help="Path to Segger output transcripts parquet file", + ) + parser.add_argument( + "--prefix", + required=True, + help="Output directory prefix (sample ID)", + ) + parser.add_argument( + "--min-transcripts", + type=int, + default=3, + help="Minimum transcripts per cell (default: 3)", + ) + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + main( + input_file=args.transcripts, + prefix=args.prefix, + min_transcripts=args.min_transcripts, + ) diff --git a/bin/utility_split_transcripts.py b/bin/utility_split_transcripts.py new file mode 100755 index 00000000..275fbab1 --- /dev/null +++ b/bin/utility_split_transcripts.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 +""" +Split transcript coordinates into spatial tiles. + +Reads a Xenium transcripts.parquet file and computes quantile-based spatial +tiles, writing a splits.csv with tile boundaries. +""" + +import argparse +import os +from typing import List + +import pandas as pd + + +def compute_quantile_ranges(df: pd.DataFrame, col: str, n_bins: int) -> List: + """ + Compute the bin edges for `df[col]` such that each of the n_bins + has ~equal count of points. Returns a list of (min, max) tuples. + """ + _, bins = pd.qcut(df[col], q=n_bins, retbins=True, duplicates="drop") + + ranges = [(bins[i], bins[i + 1]) for i in range(len(bins) - 1)] + + return ranges + + +def make_tiles(df: pd.DataFrame, x_bins: int, y_bins: int) -> pd.DataFrame: + """ + Produce a DataFrame with one row per tile: + tile_id, x_min, x_max, y_min, y_max + """ + x_ranges = compute_quantile_ranges(df, "x_location", x_bins) + y_ranges = compute_quantile_ranges(df, "y_location", y_bins) + + tiles = [] + for ix, (x_min, x_max) in enumerate(x_ranges, start=1): + for iy, (y_min, y_max) in enumerate(y_ranges, start=1): + tiles.append( + { + "tile_id": f"{ix}_{iy}", + "x_min": x_min, + "x_max": x_max, + "y_min": y_min, + "y_max": y_max, + } + ) + + return pd.DataFrame(tiles) + + +def main( + transcripts: str, + x_bins: int = 10, + y_bins: int = 10, + prefix: str = "", +) -> None: + """Generate spatial tile splits from transcript coordinates.""" + # read parquet file + df = pd.read_parquet(transcripts, engine="fastparquet") + + # compute tiles + tiles_df = make_tiles(df, x_bins, y_bins) + + # save csv file + os.makedirs(prefix, exist_ok=True) + tiles_df.to_csv(f"{prefix}/splits.csv", index=False) + + return None + + +def parse_args() -> argparse.Namespace: + """Parse command-line arguments.""" + parser = argparse.ArgumentParser( + description="Split transcript coordinates into spatial tiles." + ) + parser.add_argument( + "--transcripts", + required=True, + help="Path to transcripts parquet file", + ) + parser.add_argument( + "--x-bins", + type=int, + required=True, + help="Number of bins along X axis", + ) + parser.add_argument( + "--y-bins", + type=int, + required=True, + help="Number of bins along Y axis", + ) + parser.add_argument( + "--prefix", + required=True, + help="Output directory prefix", + ) + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + main( + transcripts=args.transcripts, + x_bins=args.x_bins, + y_bins=args.y_bins, + prefix=args.prefix, + ) diff --git a/bin/utility_upscale_mask.py b/bin/utility_upscale_mask.py new file mode 100755 index 00000000..6cc1694e --- /dev/null +++ b/bin/utility_upscale_mask.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 +""" +Restore Cellpose masks to original image resolution. + +Uses nearest-neighbor interpolation to upscale segmentation masks back +to the original image dimensions recorded in scale_info.json (produced +by downscale_morphology.py). + +Output: {prefix}/upscaled_{mask_basename}.tif +""" + +import argparse +import json +from pathlib import Path + +import numpy as np +import tifffile +from PIL import Image + + +def upscale_mask(mask_path: str, scale_info_path: str, prefix: str) -> None: + """ + Read a downscaled mask and upscale it to original dimensions. + + Args: + mask_path: Path to downscaled segmentation mask TIFF. + scale_info_path: Path to scale_info.json from downscale_morphology. + prefix: Output directory. + """ + with open(scale_info_path) as f: + info = json.load(f) + orig_h, orig_w = info["orig_h"], info["orig_w"] + + mask = tifffile.imread(mask_path) + print( + f"Mask: {mask.shape}, dtype={mask.dtype}, " + f"unique cells: {len(np.unique(mask)) - 1}" + ) + print(f"Upscaling to ({orig_h}, {orig_w})") + + pil_mask = Image.fromarray(mask) + pil_mask = pil_mask.resize((orig_w, orig_h), Image.NEAREST) + mask_up = np.array(pil_mask, dtype=mask.dtype) + + out_dir = Path(prefix) + out_dir.mkdir(parents=True, exist_ok=True) + base = Path(mask_path).stem + out_name = out_dir / f"upscaled_{base}.tif" + tifffile.imwrite(str(out_name), mask_up, compression="zlib") + print( + f"Done: {out_name}, unique cells: {len(np.unique(mask_up)) - 1}" + ) + + +def parse_args() -> argparse.Namespace: + """Parse command-line arguments.""" + parser = argparse.ArgumentParser( + description="Upscale a Cellpose mask back to original resolution." + ) + parser.add_argument("--mask", required=True, help="Downscaled mask TIFF") + parser.add_argument("--scale-info", required=True, help="scale_info.json from downscale step") + parser.add_argument("--prefix", required=True, help="Output directory") + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + upscale_mask( + mask_path=args.mask, + scale_info_path=args.scale_info, + prefix=args.prefix, + ) diff --git a/bin/xenium_patch_stitch_postprocess.py b/bin/xenium_patch_stitch_postprocess.py new file mode 100755 index 00000000..7144b1ac --- /dev/null +++ b/bin/xenium_patch_stitch_postprocess.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3 +""" +Post-process stitched per-patch segmentation outputs. + +Ensures every GeoJSON feature is a single Polygon: make_valid() and +sopa.solve_conflicts() can produce MultiPolygon, MultiLineString, or +GeometryCollection geometries that XeniumRanger rejects. Cells dropped +during cleanup are also reassigned to UNASSIGNED in the transcript CSV +so the two outputs stay consistent. +""" + +import argparse +import csv +import json + +import shapely +from shapely.geometry import mapping, shape + + +def clean_geojson(geojson_path: str) -> set: + """ + Force every feature to a single valid Polygon. + + Returns the set of cell ids whose features were dropped. + """ + with open(geojson_path) as f: + data = json.load(f) + + clean = [] + dropped_cells = set() + for feat in data["features"]: + geom = shape(feat["geometry"]) + if not geom.is_valid: + geom = shapely.make_valid(geom) + poly = None + if geom.geom_type == "Polygon": + poly = geom + elif geom.geom_type == "MultiPolygon": + poly = max(geom.geoms, key=lambda g: g.area) + elif geom.geom_type == "GeometryCollection": + polys = [g for g in geom.geoms if g.geom_type == "Polygon"] + if polys: + poly = max(polys, key=lambda g: g.area) + if poly is not None and not poly.is_empty: + feat["geometry"] = mapping(poly) + clean.append(feat) + else: + cell_id = feat.get("id") or feat.get("properties", {}).get("cell_id", "") + dropped_cells.add(str(cell_id)) + + print(f"GeoJSON: {len(clean)} kept, {len(dropped_cells)} dropped: {dropped_cells}") + data["features"] = clean + with open(geojson_path, "w") as f: + json.dump(data, f) + + return dropped_cells + + +def reassign_dropped(csv_path: str, dropped_cells: set) -> None: + """ + Reassign transcripts of dropped cells to UNASSIGNED in the CSV. + """ + if not dropped_cells: + return + + with open(csv_path) as f: + reader = csv.DictReader(f) + fieldnames = reader.fieldnames + rows = list(reader) + + reassigned = 0 + for row in rows: + if row["cell"] in dropped_cells: + row["cell"] = "" + row["is_noise"] = "1" + reassigned += 1 + + with open(csv_path, "w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() + writer.writerows(rows) + print(f"CSV: {reassigned} transcripts reassigned to UNASSIGNED") + + +def parse_args() -> argparse.Namespace: + """Parse command-line arguments.""" + parser = argparse.ArgumentParser( + description="Clean stitched GeoJSON polygons and reconcile transcript CSV." + ) + parser.add_argument("--geojson", required=True, help="Path to xr-cell-polygons.geojson") + parser.add_argument("--csv", required=True, help="Path to xr-transcript-metadata.csv") + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + dropped = clean_geojson(args.geojson) + reassign_dropped(args.csv, dropped) diff --git a/bin/xenium_patch_stitch_transcripts.py b/bin/xenium_patch_stitch_transcripts.py new file mode 100755 index 00000000..d9fb8d41 --- /dev/null +++ b/bin/xenium_patch_stitch_transcripts.py @@ -0,0 +1,808 @@ +#!/usr/bin/env python3 +"""Stitch per-patch Baysor segmentation results into unified output. + +Standalone script that replaces the xenium_patch CLI package's stitch +functionality. Uses sopa's solve_conflicts() for overlap resolution. +""" + +from __future__ import annotations + +import argparse +import json +import os +from concurrent.futures import ThreadPoolExecutor +from dataclasses import dataclass +from pathlib import Path + +import geopandas as gpd +import numpy as np +import pyarrow as pa +import pyarrow.compute as pc +import pyarrow.csv as pa_csv +import shapely +from shapely.affinity import translate +from shapely.geometry import mapping, shape +from sopa.segmentation.resolve import solve_conflicts + +# --------------------------------------------------------------------------- +# Geometry helpers +# --------------------------------------------------------------------------- + + +def _ensure_polygon(geom) -> "shapely.Polygon | None": + """Extract a single Polygon from any geometry, or return None. + + XeniumRanger only accepts Polygon. make_valid() and solve_conflicts + can produce MultiPolygon, GeometryCollection, MultiLineString, etc. + """ + if geom is None or geom.is_empty: + return None + if geom.geom_type == "Polygon": + return geom + if geom.geom_type == "MultiPolygon": + return max(geom.geoms, key=lambda g: g.area) + if geom.geom_type == "GeometryCollection": + polys = [g for g in geom.geoms if g.geom_type == "Polygon"] + return max(polys, key=lambda g: g.area) if polys else None + # LineString, MultiLineString, Point, etc. — not a polygon + return None + + +# --------------------------------------------------------------------------- +# Inline types (from _types.py) +# --------------------------------------------------------------------------- + + +@dataclass(frozen=True) +class Bounds: + """Axis-aligned bounding box in either pixel or micron coordinates.""" + + x_min: float + x_max: float + y_min: float + y_max: float + + +@dataclass(frozen=True) +class PatchInfo: + """Metadata for a single patch in the grid.""" + + patch_id: str + row: int + col: int + global_bounds_px: Bounds + global_bounds_um: Bounds + core_bounds_px: Bounds + core_bounds_um: Bounds + + +@dataclass +class PatchGridMetadata: + """Full grid metadata, serializable to JSON.""" + + version: str + bundle_path: str + image_height_px: int + image_width_px: int + pixel_size_um: float + transcript_extent_um: Bounds + grid_rows: int + grid_cols: int + overlap_um: float + overlap_px: int + patches: list[PatchInfo] + grid_type: str = "uniform" + + +# --------------------------------------------------------------------------- +# Internal result containers +# --------------------------------------------------------------------------- + + +@dataclass +class _PatchGeoResult: + """Result of parallel GeoJSON processing for a single patch.""" + + features: list[dict] + cell_ids: list[str] + + +@dataclass +class _PatchCsvResult: + """Result of parallel CSV reading for a single patch.""" + + table: pa.Table + has_cell_col: bool + has_x_col: bool + has_y_col: bool + has_gene_col: bool = False + has_feature_name_col: bool = False + + +# --------------------------------------------------------------------------- +# Grid metadata I/O (from grid.py) +# --------------------------------------------------------------------------- + + +def _dict_to_bounds(d: dict) -> Bounds: + return Bounds(d["x_min"], d["x_max"], d["y_min"], d["y_max"]) + + +def load_grid_metadata(input_path: Path) -> PatchGridMetadata: + """Deserialize PatchGridMetadata from JSON. + + Args: + input_path: Path to JSON file to read. + + Returns: + Reconstructed PatchGridMetadata. + """ + with open(input_path) as f: + data = json.load(f) + + patches = [ + PatchInfo( + patch_id=p["patch_id"], + row=p["row"], + col=p["col"], + global_bounds_px=_dict_to_bounds(p["global_bounds_px"]), + global_bounds_um=_dict_to_bounds(p["global_bounds_um"]), + core_bounds_px=_dict_to_bounds(p["core_bounds_px"]), + core_bounds_um=_dict_to_bounds(p["core_bounds_um"]), + ) + for p in data["patches"] + ] + + return PatchGridMetadata( + version=data["version"], + bundle_path=data["bundle_path"], + image_height_px=data["image_height_px"], + image_width_px=data["image_width_px"], + pixel_size_um=data["pixel_size_um"], + transcript_extent_um=_dict_to_bounds(data["transcript_extent_um"]), + grid_rows=data["grid_rows"], + grid_cols=data["grid_cols"], + overlap_um=data["overlap_um"], + overlap_px=data["overlap_px"], + grid_type=data.get("grid_type", "uniform"), + patches=patches, + ) + + +# --------------------------------------------------------------------------- +# GeoJSON I/O (from polygon_io.py) +# --------------------------------------------------------------------------- + + +def _normalize_geometry_collection(geojson: dict) -> dict: + """Convert a GeometryCollection to a FeatureCollection. + + proseg-to-baysor produces a non-standard GeoJSON GeometryCollection where + each geometry object has a custom ``cell`` key (bare integer) instead of + using Feature wrappers. This normalises it to a standard FeatureCollection + with ``id`` and ``properties.cell_id`` on each feature, using the + ``"cell-{N}"`` format that matches the companion CSV. + + Args: + geojson: Parsed GeoJSON dict with type GeometryCollection. + + Returns: + Standard FeatureCollection dict. + """ + features = [] + for geom in geojson.get("geometries", []): + cell_raw = geom.get("cell", "") + cell_id = str(cell_raw) + clean_geom = {k: v for k, v in geom.items() if k != "cell"} + feature = { + "type": "Feature", + "id": cell_id, + "geometry": clean_geom, + "properties": {"cell_id": cell_id}, + } + features.append(feature) + return {"type": "FeatureCollection", "features": features} + + +def read_geojson(geojson_path: Path) -> dict: + """Read a GeoJSON file and normalise to FeatureCollection. + + Handles both standard FeatureCollections and the GeometryCollection + format produced by proseg-to-baysor. + + Args: + geojson_path: Path to the GeoJSON file. + + Returns: + Parsed GeoJSON dict (always a FeatureCollection). + """ + with open(geojson_path) as f: + data = json.load(f) + if data.get("type") == "GeometryCollection": + return _normalize_geometry_collection(data) + return data + + +def transform_polygons(geojson: dict, offset_x: float, offset_y: float) -> dict: + """Shift all polygon coordinates by (offset_x, offset_y). + + Args: + geojson: Input FeatureCollection. + offset_x: Translation in x. + offset_y: Translation in y. + + Returns: + New FeatureCollection with shifted geometries. + """ + features = [] + for feat in geojson.get("features", []): + geom = shape(feat["geometry"]) + shifted = translate(geom, xoff=offset_x, yoff=offset_y) + new_feat = {**feat, "geometry": mapping(shifted)} + features.append(new_feat) + return {"type": "FeatureCollection", "features": features} + + +def write_geojson(geojson: dict, output_path: Path) -> None: + """Write a GeoJSON FeatureCollection. + + Args: + geojson: GeoJSON dict to write. + output_path: Destination path (parent dirs created automatically). + """ + output_path.parent.mkdir(parents=True, exist_ok=True) + with open(output_path, "w") as f: + json.dump(geojson, f) + + +# --------------------------------------------------------------------------- +# Arrow utilities (from _arrow_utils.py) +# --------------------------------------------------------------------------- + + +def float_str_array(f64_array: pa.Array) -> pa.Array: + """Convert a float64 pyarrow array to string using Python's str(float) format. + + pyarrow's built-in cast omits trailing '.0' for whole numbers. This + function ensures output matches str(float(...)) for CSV compatibility. + + Args: + f64_array: Float64 pyarrow array to convert. + + Returns: + String pyarrow array with Python-formatted float values. + """ + return pa.array( + [str(v) if v is not None else None for v in f64_array.to_pylist()], + type=pa.string(), + ) + + +# --------------------------------------------------------------------------- +# Parallel I/O +# --------------------------------------------------------------------------- + + +def _read_and_transform_geojson( + patch: PatchInfo, + patches_dir: Path, + geojson_filename: str, +) -> _PatchGeoResult | None: + """Read, transform GeoJSON for a single patch (no core clipping). + + Args: + patch: Patch metadata. + patches_dir: Root patches directory. + geojson_filename: GeoJSON filename within each patch directory. + + Returns: + _PatchGeoResult with features and cell IDs, or None if no GeoJSON. + """ + geojson_path = patches_dir / patch.patch_id / geojson_filename + if not geojson_path.exists(): + return None + + geojson = read_geojson(geojson_path) + + offset_x = patch.global_bounds_um.x_min + offset_y = patch.global_bounds_um.y_min + geojson = transform_polygons(geojson, offset_x, offset_y) + + features = geojson.get("features", []) + seen: set[str] = set() + cell_ids: list[str] = [] + for feat in features: + old_id = str(feat.get("id", feat.get("properties", {}).get("cell_id", ""))) + if old_id not in seen: + seen.add(old_id) + cell_ids.append(old_id) + + return _PatchGeoResult(features=features, cell_ids=cell_ids) + + +def _read_patch_csv( + patch: PatchInfo, + patches_dir: Path, + csv_filename: str, +) -> _PatchCsvResult | None: + """Read a patch CSV into a pyarrow Table. + + All columns are read as strings to preserve exact formatting. + + Args: + patch: Patch metadata. + patches_dir: Root patches directory. + csv_filename: CSV filename within each patch directory. + + Returns: + _PatchCsvResult with the table and column presence flags, or None. + """ + csv_path = patches_dir / patch.patch_id / csv_filename + if not csv_path.exists(): + return None + + with open(csv_path) as fh: + header_line = fh.readline().strip() + col_names = header_line.split(",") + all_string_types = {name: pa.string() for name in col_names} + + table = pa_csv.read_csv( + csv_path, + convert_options=pa_csv.ConvertOptions( + column_types=all_string_types, + strings_can_be_null=False, + ), + read_options=pa_csv.ReadOptions(use_threads=True), + ) + + return _PatchCsvResult( + table=table, + has_cell_col="cell" in table.column_names, + has_x_col="x" in table.column_names, + has_y_col="y" in table.column_names, + has_gene_col="gene" in table.column_names, + has_feature_name_col="feature_name" in table.column_names, + ) + + +# --------------------------------------------------------------------------- +# CSV processing +# --------------------------------------------------------------------------- + + +def _transform_patch_coords( + csv_result: _PatchCsvResult, + offset_x: float, + offset_y: float, +) -> pa.Table: + """Shift transcript coordinates from local patch space to global space. + + Args: + csv_result: The raw CSV table and column flags. + offset_x: X offset for coordinate transform (microns). + offset_y: Y offset for coordinate transform (microns). + + Returns: + Table with x, y columns shifted to global coordinates. + """ + table = csv_result.table + + if table.num_rows == 0: + return table + + if csv_result.has_x_col: + x_f64 = pc.add( + table.column("x").cast(pa.float64()), + pa.scalar(offset_x, type=pa.float64()), + ) + table = table.set_column( + table.schema.get_field_index("x"), + "x", + float_str_array(x_f64), + ) + if csv_result.has_y_col: + y_f64 = pc.add( + table.column("y").cast(pa.float64()), + pa.scalar(offset_y, type=pa.float64()), + ) + table = table.set_column( + table.schema.get_field_index("y"), + "y", + float_str_array(y_f64), + ) + + return table + + +# --------------------------------------------------------------------------- +# Sopa conflict resolution +# --------------------------------------------------------------------------- + + +def _stitch_sopa_resolve( + metadata: PatchGridMetadata, + geo_results: list[_PatchGeoResult | None], + csv_results: list[_PatchCsvResult | None], + all_geojson_features: list[dict], + all_tables: list[pa.Table], + threshold: float = 0.5, +) -> set[str]: + """Stitch per-patch segmentation using spatial containment assignment. + + 1. Collect ALL non-empty polygons from all patches (no transcript filtering). + 2. Resolve overlapping polygons via sopa's solve_conflicts(). + 3. Assign sequential global cell IDs (cell-1, cell-2, ...). + 4. Spatially assign transcripts to resolved polygons using STRtree. + 5. Noise transcripts (outside all polygons) kept only from their core patch. + + This approach works regardless of whether Baysor's CSV ``cell`` column + matches GeoJSON cell IDs -- all assignment is done by spatial containment. + + Args: + metadata: Grid metadata with patch list. + geo_results: Per-patch GeoJSON results (already in global coords). + csv_results: Per-patch CSV results. + all_geojson_features: Output list to append resolved GeoJSON features. + all_tables: Output list to append processed CSV tables. + threshold: Overlap threshold for sopa's solve_conflicts (0-1). + + Returns: + Set of global cell IDs created by merging overlapping cells. + """ + # --- Phase 1: Collect all polygons from all patches --- + all_polygons: list = [] + patch_indices_list: list[int] = [] + + for i, patch in enumerate(metadata.patches): + geo_result = geo_results[i] + if geo_result is None: + continue + + for feat in geo_result.features: + polygon = shape(feat["geometry"]) + if polygon.is_empty: + continue + if not polygon.is_valid: + polygon = shapely.make_valid(polygon) + # Ensure we have a single Polygon (xeniumranger rejects all else) + polygon = _ensure_polygon(polygon) + if polygon is None: + continue + + all_polygons.append(polygon) + patch_indices_list.append(i) + + if not all_polygons: + print("[stitch] No polygons found in any patch") + # Still transform and collect CSVs as noise-only + for i, patch in enumerate(metadata.patches): + csv_result = csv_results[i] + if csv_result is None: + continue + offset_x = patch.global_bounds_um.x_min + offset_y = patch.global_bounds_um.y_min + transformed = _transform_patch_coords(csv_result, offset_x, offset_y) + if transformed.num_rows > 0: + all_tables.append(transformed) + return set() + + # --- Phase 2: Resolve overlapping polygons via sopa --- + patch_idx_array = np.array(patch_indices_list, dtype=np.int64) + input_gdf = gpd.GeoDataFrame(geometry=all_polygons) + resolved_gdf, kept_indices = solve_conflicts( + input_gdf, + threshold=threshold, + patch_indices=patch_idx_array, + return_indices=True, + ) + + # --- Phase 3: Assign global cell IDs to resolved polygons --- + merged_cell_ids: set[str] = set() + kept_arr = np.asarray(kept_indices) + resolved_polys: list = [] + resolved_ids: list[str] = [] + + for rank, orig_idx in enumerate(kept_arr, start=1): + global_id = f"cell-{rank}" + geom = resolved_gdf.geometry.iloc[rank - 1] + + # Ensure single Polygon after solve_conflicts union + geom = _ensure_polygon(geom) + if geom is None: + continue + + if orig_idx < 0: + merged_cell_ids.add(global_id) + + resolved_polys.append(geom) + resolved_ids.append(global_id) + + all_geojson_features.append( + { + "type": "Feature", + "id": global_id, + "geometry": mapping(geom), + "properties": {"cell_id": global_id}, + } + ) + + print( + f"[stitch] Resolved {len(all_polygons)} input polygons to " + f"{len(resolved_polys)} cells ({len(merged_cell_ids)} merged)" + ) + + # --- Phase 4: Spatial transcript assignment via STRtree --- + poly_tree = shapely.STRtree(resolved_polys) + + for i, patch in enumerate(metadata.patches): + csv_result = csv_results[i] + if csv_result is None: + continue + + offset_x = patch.global_bounds_um.x_min + offset_y = patch.global_bounds_um.y_min + core = patch.core_bounds_um + + transformed = _transform_patch_coords(csv_result, offset_x, offset_y) + if transformed.num_rows == 0: + continue + + if not csv_result.has_x_col or not csv_result.has_y_col: + all_tables.append(transformed) + continue + + # Get global coordinates for spatial query + gx = transformed.column("x").cast(pa.float64()).to_numpy(zero_copy_only=False) + gy = transformed.column("y").cast(pa.float64()).to_numpy(zero_copy_only=False) + points = shapely.points(gx, gy) + + # Query STRtree: returns (input_indices, tree_indices) + point_hits, poly_hits = poly_tree.query(points, predicate="intersects") + + # Build point -> cell_id mapping (first hit wins) + point_to_cell: dict[int, str] = {} + for pt_idx, poly_idx in zip(point_hits, poly_hits): + if pt_idx not in point_to_cell: + point_to_cell[pt_idx] = resolved_ids[poly_idx] + + # Build cell and is_noise columns + n_rows = transformed.num_rows + cell_arr = [""] * n_rows + is_noise_arr = ["true"] * n_rows + for pt_idx, cell_id in point_to_cell.items(): + cell_arr[pt_idx] = cell_id + is_noise_arr[pt_idx] = "false" + + # Filter noise transcripts to core bounds only + # Assigned transcripts are kept from all patches (dedup later by transcript_id) + in_core = ( + (gx >= core.x_min) + & (gx < core.x_max) + & (gy >= core.y_min) + & (gy < core.y_max) + ) + is_assigned = np.array([c != "" for c in cell_arr]) + keep_mask = pa.array(is_assigned | in_core, type=pa.bool_()) + + filtered = transformed.filter(keep_mask) + cell_arr_filtered = [c for c, k in zip(cell_arr, (is_assigned | in_core)) if k] + is_noise_filtered = [ + n for n, k in zip(is_noise_arr, (is_assigned | in_core)) if k + ] + + if filtered.num_rows == 0: + continue + + # Set cell and is_noise columns + cell_idx = ( + filtered.schema.get_field_index("cell") + if "cell" in filtered.column_names + else None + ) + if cell_idx is not None: + filtered = filtered.set_column( + cell_idx, "cell", pa.array(cell_arr_filtered, type=pa.string()) + ) + else: + filtered = filtered.append_column( + "cell", pa.array(cell_arr_filtered, type=pa.string()) + ) + + noise_idx = ( + filtered.schema.get_field_index("is_noise") + if "is_noise" in filtered.column_names + else None + ) + if noise_idx is not None: + filtered = filtered.set_column( + noise_idx, + "is_noise", + pa.array(is_noise_filtered, type=pa.string()), + ) + else: + filtered = filtered.append_column( + "is_noise", pa.array(is_noise_filtered, type=pa.string()) + ) + + all_tables.append(filtered) + + return merged_cell_ids + + +# --------------------------------------------------------------------------- +# Main orchestrator +# --------------------------------------------------------------------------- + + +def stitch_transcript_assignments( + patches_dir: Path, + output_dir: Path, + csv_filename: str = "segmentation.csv", + geojson_filename: str = "segmentation_polygons.json", + max_workers: int | None = None, +) -> None: + """Stitch per-patch transcript assignments and polygons into unified output. + + For each patch, reads the transcript assignment CSV and polygon GeoJSON. + Cells are deduplicated using sopa's solve_conflicts() which resolves + overlapping cells at patch boundaries based on area overlap ratio. + + Processing is split into a parallel I/O phase (reading GeoJSON and CSV + files via thread pool) and a sequential phase (dedup, global cell ID + assignment, remapping, and concatenation). + + Args: + patches_dir: Directory containing patch subdirectories and patch_grid.json. + output_dir: Output directory for stitched CSV and GeoJSON. + csv_filename: CSV filename within each patch directory. + geojson_filename: GeoJSON filename within each patch directory. + max_workers: Maximum number of threads for parallel I/O. + """ + patches_dir = Path(patches_dir) + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + metadata = load_grid_metadata(patches_dir / "patch_grid.json") + + n_patches = len(metadata.patches) + if max_workers is None: + max_workers = min(n_patches, os.cpu_count() or 1) + + # ---- Parallel phase: read GeoJSON and CSV files concurrently ---- + with ThreadPoolExecutor(max_workers=max_workers) as executor: + geo_futures = [ + executor.submit( + _read_and_transform_geojson, p, patches_dir, geojson_filename + ) + for p in metadata.patches + ] + csv_futures = [ + executor.submit(_read_patch_csv, p, patches_dir, csv_filename) + for p in metadata.patches + ] + geo_results = [f.result() for f in geo_futures] + csv_results = [f.result() for f in csv_futures] + + # ---- Sequential phase: assign global cell IDs, remap, concatenate ---- + all_tables: list[pa.Table] = [] + all_geojson_features: list[dict] = [] + + _stitch_sopa_resolve( + metadata, + geo_results, + csv_results, + all_geojson_features, + all_tables, + threshold=0.5, + ) + + # Concatenate all patch tables + if all_tables: + merged = pa.concat_tables(all_tables) + + # Deduplicate by transcript_id: prefer assigned over noise + if "transcript_id" in merged.column_names: + if "cell" in merged.column_names: + is_noise = pc.equal(merged.column("cell"), "").cast(pa.int8()) + row_order = pa.array(np.arange(merged.num_rows), type=pa.int64()) + sort_table = pa.table({"_noise": is_noise, "_row": row_order}) + sort_indices = pc.sort_indices( + sort_table, + sort_keys=[("_noise", "ascending"), ("_row", "ascending")], + ) + merged = merged.take(sort_indices) + + tid_np = merged.column("transcript_id").to_numpy(zero_copy_only=False) + _, first_indices = np.unique(tid_np, return_index=True) + first_indices.sort() + merged = merged.take(first_indices) + + # Log assignment stats + if "cell" in merged.column_names: + cell_vals = merged.column("cell").to_pylist() + n_assigned = sum(1 for c in cell_vals if c) + n_noise = sum(1 for c in cell_vals if not c) + print( + f"[stitch] Final: {merged.num_rows} transcripts, " + f"{n_assigned} assigned, {n_noise} noise" + ) + + # Cast is_noise to integer for xeniumranger compatibility + if "is_noise" in merged.column_names: + noise_col = merged.column("is_noise") + if noise_col.type == pa.string(): + lower = pc.utf8_lower(noise_col) + is_true = pc.or_(pc.equal(lower, "true"), pc.equal(lower, "1")) + idx = merged.column_names.index("is_noise") + merged = merged.set_column(idx, "is_noise", is_true.cast(pa.int8())) + + # Write CSV + if merged.num_rows > 0: + csv_out = output_dir / "xr-transcript-metadata.csv" + pa_csv.write_csv( + merged, + csv_out, + write_options=pa_csv.WriteOptions(quoting_style="needed"), + ) + + # Safety net: remove orphan polygons with zero transcripts + if all_geojson_features and all_tables: + csv_cell_ids: set[str] = set() + if "cell" in merged.column_names: + csv_cell_ids = set(c for c in merged.column("cell").to_pylist() if c) + all_geojson_features = [ + f + for f in all_geojson_features + if str(f.get("id", f.get("properties", {}).get("cell_id", ""))) + in csv_cell_ids + ] + + # Write merged GeoJSON + if all_geojson_features: + merged_geo = {"type": "FeatureCollection", "features": all_geojson_features} + write_geojson(merged_geo, output_dir / "xr-cell-polygons.geojson") + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Stitch per-patch Baysor segmentation results into unified output." + ) + parser.add_argument( + "--patches", + type=Path, + required=True, + help="Directory containing patch subdirectories and patch_grid.json", + ) + parser.add_argument( + "--output", + type=Path, + required=True, + help="Output directory for stitched CSV and GeoJSON", + ) + parser.add_argument( + "--csv-filename", + default="segmentation.csv", + help="CSV filename within each patch (default: segmentation.csv)", + ) + parser.add_argument( + "--geojson-filename", + default="segmentation_polygons.json", + help="GeoJSON filename within each patch (default: segmentation_polygons.json)", + ) + args = parser.parse_args() + + stitch_transcript_assignments( + patches_dir=args.patches, + output_dir=args.output, + csv_filename=args.csv_filename, + geojson_filename=args.geojson_filename, + ) + + +if __name__ == "__main__": + main() diff --git a/conf/base.config b/conf/base.config index 187a154e..476c4dbe 100644 --- a/conf/base.config +++ b/conf/base.config @@ -1,6 +1,6 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - nf-core/spatialxe Nextflow base config file + nf-core/spatialaxe Nextflow base config file ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ A 'blank slate' config file, appropriate for general use on most high performance compute environments. Assumes that all software is installed and available on @@ -10,57 +10,94 @@ process { - // TODO nf-core: Check the defaults for all processes - cpus = { 1 * task.attempt } - memory = { 6.GB * task.attempt } - time = { 4.h * task.attempt } + cpus = { 1 * task.attempt } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } - errorStrategy = { task.exitStatus in ((130..145) + 104 + (175..177)) ? 'retry' : 'finish' } - maxRetries = 1 + // resourceLimits = [ cpus: 192, memory: 750.GB, time: 72.h ] + + // Retry signal-induced exits and "killed without exit code" cases: + // 130..145 = signal exits (SIGINT=130, SIGKILL=137, SIGTERM=143, etc.) + // 104 = ECONNRESET (transient network failures during stage-in/out) + // 2147483647 = Integer.MAX_VALUE, Nextflow's sentinel for tasks that died + // before writing .exitcode (Nextflow surfaces this as + // "terminated for an unknown reason -- Likely it has been + // terminated by the external system"). Common on AWS Batch + // spot capacity, kubernetes preemption, and grid-scheduler + // cancellations. See nextflow docs/aws.md for the AWS case. + errorStrategy = { task.exitStatus in ((130..145) + 104 + 2147483647) ? 'retry' : 'finish' } + maxRetries = 3 maxErrors = '-1' - // Process-specific resource requirements - // NOTE - Please try and reuse the labels below as much as possible. - // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. - // If possible, it would be nice to keep the same label naming convention when - // adding in your local modules too. - // TODO nf-core: Customise requirements for specific processes. - // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + // ========================================================================= + // Standard nf-core CPU labels + // ========================================================================= + withLabel:process_single { - cpus = { 1 } + cpus = { 1 } memory = { 6.GB * task.attempt } - time = { 4.h * task.attempt } + time = { 4.h * task.attempt } } + withLabel:process_low { - cpus = { 2 * task.attempt } + cpus = { 2 * task.attempt } memory = { 12.GB * task.attempt } - time = { 4.h * task.attempt } + time = { 4.h * task.attempt } } + withLabel:process_medium { - cpus = { 6 * task.attempt } - memory = { 36.GB * task.attempt } - time = { 8.h * task.attempt } + cpus = { 6 * task.attempt } + memory = { 42.GB * task.attempt } + time = { 8.h * task.attempt } } + withLabel:process_high { - cpus = { 12 * task.attempt } + cpus = { 12 * task.attempt } memory = { 72.GB * task.attempt } - time = { 16.h * task.attempt } + time = { 16.h * task.attempt } } + + withLabel:process_xl { + cpus = { 30 * task.attempt } + memory = { 240.GB * task.attempt } + time = { 24.h * task.attempt } + } + withLabel:process_long { - time = { 20.h * task.attempt } + time = { 20.h * task.attempt } } + withLabel:process_high_memory { memory = { 200.GB * task.attempt } } + withLabel:error_ignore { errorStrategy = 'ignore' } + withLabel:error_retry { errorStrategy = 'retry' maxRetries = 2 } - withLabel: process_gpu { - ext.use_gpu = { workflow.profile.contains('gpu') } - accelerator = { workflow.profile.contains('gpu') ? 1 : null } + + // ========================================================================= + // GPU labels + // ========================================================================= + + // Multi-GPU processes (e.g., Segger train/predict) + withLabel:process_gpu { + ext.use_gpu = { params.use_gpu } + accelerator = { params.use_gpu ? 1 : null } + // containerOptions = { "--shm-size ${task.memory.toGiga().intValue()}g" } + } + + // Single-GPU processes (e.g., Cellpose, StarDist) + withLabel:process_gpu_single { + ext.use_gpu = { params.use_gpu } + accelerator = { params.use_gpu ? 1 : null } + cpus = { 12 * task.attempt } + memory = { 72.GB * task.attempt } + time = { 16.h * task.attempt } } + } diff --git a/conf/containers_conda_lock_files_amd64.config b/conf/containers_conda_lock_files_amd64.config deleted file mode 100644 index d3ee1b4e..00000000 --- a/conf/containers_conda_lock_files_amd64.config +++ /dev/null @@ -1,2 +0,0 @@ -process { withName: 'FASTQC' { container = 'modules/nf-core/fastqc/.conda-lock/linux_amd64-bd-5cb1a2fa2f18c7c2_1.txt' } } -process { withName: 'MULTIQC' { container = 'modules/nf-core/multiqc/.conda-lock/linux_amd64-bd-db7c73dae76bc9e6_1.txt' } } diff --git a/conf/containers_conda_lock_files_arm64.config b/conf/containers_conda_lock_files_arm64.config deleted file mode 100644 index 2b90ac4f..00000000 --- a/conf/containers_conda_lock_files_arm64.config +++ /dev/null @@ -1,2 +0,0 @@ -process { withName: 'FASTQC' { container = 'modules/nf-core/fastqc/.conda-lock/linux_arm64-bd-e455e32f745abe68_1.txt' } } -process { withName: 'MULTIQC' { container = 'modules/nf-core/multiqc/.conda-lock/linux_arm64-bd-d167b8012595a136_1.txt' } } diff --git a/conf/containers_docker_amd64.config b/conf/containers_docker_amd64.config deleted file mode 100644 index 65f1814a..00000000 --- a/conf/containers_docker_amd64.config +++ /dev/null @@ -1,2 +0,0 @@ -process { withName: 'FASTQC' { container = 'community.wave.seqera.io/library/fastqc:0.12.1--5cb1a2fa2f18c7c2' } } -process { withName: 'MULTIQC' { container = 'community.wave.seqera.io/library/multiqc:1.34--db7c73dae76bc9e6' } } diff --git a/conf/containers_docker_arm64.config b/conf/containers_docker_arm64.config deleted file mode 100644 index 6c845ba4..00000000 --- a/conf/containers_docker_arm64.config +++ /dev/null @@ -1,2 +0,0 @@ -process { withName: 'FASTQC' { container = 'community.wave.seqera.io/library/fastqc:0.12.1--e455e32f745abe68' } } -process { withName: 'MULTIQC' { container = 'community.wave.seqera.io/library/multiqc:1.34--d167b8012595a136' } } diff --git a/conf/containers_singularity_https_amd64.config b/conf/containers_singularity_https_amd64.config deleted file mode 100644 index 838f2484..00000000 --- a/conf/containers_singularity_https_amd64.config +++ /dev/null @@ -1,2 +0,0 @@ -process { withName: 'FASTQC' { container = 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/f2/f20b021476d1d87658820f971ebecc1e8cdbde0f338eb0d9cea2b0a8fc54a54b/data' } } -process { withName: 'MULTIQC' { container = 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/1b/1bef8af6be88c5733461959c46ac8ef73d18f65277f62a1695d0e1633054f9c2/data' } } diff --git a/conf/containers_singularity_https_arm64.config b/conf/containers_singularity_https_arm64.config deleted file mode 100644 index 090173be..00000000 --- a/conf/containers_singularity_https_arm64.config +++ /dev/null @@ -1,2 +0,0 @@ -process { withName: 'FASTQC' { container = 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/46/46daf2dad0169afd2ae047c3e50ed3776259f664bf07e5e06b045dc23449e994/data' } } -process { withName: 'MULTIQC' { container = 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/9a/9a1fec9662a152683e6fcae440d0ce20920b3b89dc62d1e3a52e73f92eba0969/data' } } diff --git a/conf/containers_singularity_oras_amd64.config b/conf/containers_singularity_oras_amd64.config deleted file mode 100644 index 773f3698..00000000 --- a/conf/containers_singularity_oras_amd64.config +++ /dev/null @@ -1,2 +0,0 @@ -process { withName: 'FASTQC' { container = 'oras://community.wave.seqera.io/library/fastqc:0.12.1--5c4bd442468d75dd' } } -process { withName: 'MULTIQC' { container = 'oras://community.wave.seqera.io/library/multiqc:1.34--4fc8657c816047c0' } } diff --git a/conf/containers_singularity_oras_arm64.config b/conf/containers_singularity_oras_arm64.config deleted file mode 100644 index 798cc638..00000000 --- a/conf/containers_singularity_oras_arm64.config +++ /dev/null @@ -1,2 +0,0 @@ -process { withName: 'FASTQC' { container = 'oras://community.wave.seqera.io/library/fastqc:0.12.1--127a87fc06499035' } } -process { withName: 'MULTIQC' { container = 'oras://community.wave.seqera.io/library/multiqc:1.34--7fbd82d945c06726' } } diff --git a/conf/igenomes.config b/conf/igenomes.config deleted file mode 100644 index 3f114377..00000000 --- a/conf/igenomes.config +++ /dev/null @@ -1,440 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for iGenomes paths -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines reference genomes using iGenome paths. - Can be used by any config that customises the base path using: - $params.igenomes_base / --igenomes_base ----------------------------------------------------------------------------------------- -*/ - -params { - // illumina iGenomes reference file paths - genomes { - 'GRCh37' { - fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" - mito_name = "MT" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/GRCh37-blacklist.bed" - } - 'GRCh38' { - fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" - } - 'CHM13' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/" - bwamem2 = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/" - gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf" - gff = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2T-CHM13v2.0/GCF_009914755.1_T2T-CHM13v2.0_genomic.gff.gz" - mito_name = "chrM" - } - 'GRCm38' { - fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt" - mito_name = "MT" - macs_gsize = "1.87e9" - blacklist = "${projectDir}/assets/blacklists/GRCm38-blacklist.bed" - } - 'TAIR10' { - fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/README.txt" - mito_name = "Mt" - } - 'EB2' { - fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/README.txt" - } - 'UMD3.1' { - fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt" - mito_name = "MT" - } - 'WBcel235' { - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" - mito_name = "MtDNA" - macs_gsize = "9e7" - } - 'CanFam3.1' { - fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt" - mito_name = "MT" - } - 'GRCz10' { - fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" - mito_name = "MT" - } - 'BDGP6' { - fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" - mito_name = "M" - macs_gsize = "1.2e8" - } - 'EquCab2' { - fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/README.txt" - mito_name = "MT" - } - 'EB1' { - fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/README.txt" - } - 'Galgal4' { - fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" - mito_name = "MT" - } - 'Gm01' { - fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/README.txt" - } - 'Mmul_1' { - fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/README.txt" - mito_name = "MT" - } - 'IRGSP-1.0' { - fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" - mito_name = "Mt" - } - 'CHIMP2.1.4' { - fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/README.txt" - mito_name = "MT" - } - 'Rnor_5.0' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.bed" - mito_name = "MT" - } - 'Rnor_6.0' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" - mito_name = "MT" - } - 'R64-1-1' { - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" - mito_name = "MT" - macs_gsize = "1.2e7" - } - 'EF2' { - fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/README.txt" - mito_name = "MT" - macs_gsize = "1.21e7" - } - 'Sbi1' { - fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/README.txt" - } - 'Sscrofa10.2' { - fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/README.txt" - mito_name = "MT" - } - 'AGPv3' { - fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" - mito_name = "Mt" - } - 'hg38' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" - } - 'hg19' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/hg19-blacklist.bed" - } - 'mm10' { - fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "1.87e9" - blacklist = "${projectDir}/assets/blacklists/mm10-blacklist.bed" - } - 'bosTau8' { - fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed" - mito_name = "chrM" - } - 'ce10' { - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "9e7" - } - 'canFam3' { - fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt" - mito_name = "chrM" - } - 'danRer10' { - fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "1.37e9" - } - 'dm6' { - fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "1.2e8" - } - 'equCab2' { - fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/README.txt" - mito_name = "chrM" - } - 'galGal4' { - fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/README.txt" - mito_name = "chrM" - } - 'panTro4' { - fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/README.txt" - mito_name = "chrM" - } - 'rn6' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed" - mito_name = "chrM" - } - 'sacCer3' { - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BismarkIndex/" - readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "1.2e7" - } - 'susScr3' { - fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/README.txt" - mito_name = "chrM" - } - } -} diff --git a/conf/igenomes_ignored.config b/conf/igenomes_ignored.config deleted file mode 100644 index b4034d82..00000000 --- a/conf/igenomes_ignored.config +++ /dev/null @@ -1,9 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for iGenomes paths -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Empty genomes dictionary to use when igenomes is ignored. ----------------------------------------------------------------------------------------- -*/ - -params.genomes = [:] diff --git a/conf/modules.config b/conf/modules.config index d203d2b6..845f0df4 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -15,20 +15,370 @@ process { publishDir = [ path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] - withName: FASTQC { - ext.args = '--quiet' + // ---------------------------- multiqc --------------------------------------------------- + + withName: MULTIQC { + ext.args = { params.multiqc_title ? "--title \"${params.multiqc_title}\"" : '' } + publishDir = [ + path: { "${params.outdir}/${params.mode}/multiqc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: MULTIQC_PRE_XR_RUN { + ext.args = { "--title \"${params.multiqc_title ?: 'MultiQC Pre Xeniumranger import-segmentation Run'}\"" } + publishDir = [ + path: { "${params.outdir}/${params.mode}/multiqc/raw_bundle" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: MULTIQC_POST_XR_RUN { + ext.args = { "--title \"${params.multiqc_title ?: 'MultiQC Post Xeniumranger import-segmentation Run'}\"" } + publishDir = [ + path: { "${params.outdir}/${params.mode}/multiqc/redefined_bundle" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] } - withName: 'MULTIQC' { - ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + + // ---------------------------- xeniumranger --------------------------------------------------- + + // XeniumRanger: must use local scratch for large output bundles + withName:".*XENIUMRANGER.*" { + scratch = true + } + + // scratch=true is set in base.config via withName:".*XENIUMRANGER.*" + withName: XENIUMRANGER_RELABEL { publishDir = [ - path: { "${params.outdir}/multiqc" }, + path: "${params.outdir}/${params.mode}/xeniumranger/relabel", + mode: params.publish_dir_mode, + ] + } + + withName: XENIUMRANGER_RESEGMENT { + publishDir = [ + path: "${params.outdir}/${params.mode}/xeniumranger/resegment", + mode: params.publish_dir_mode, + ] + ext.args = {[ + // Disable boundary/interior stain when the param is falsy; keep tool default when truthy. + !params.boundary_stain ? "--boundary-stain=disable" : "", + !params.interior_stain ? "--interior-stain=disable" : "", + params.expansion_distance != null ? "--expansion-distance=${params.expansion_distance}" : "", + params.dapi_filter != null ? "--dapi-filter=${params.dapi_filter}" : "", + ].join(' ').trim()} + } + + withName: XENIUMRANGER_IMPORT_SEGMENTATION { + publishDir = [ + path: "${params.outdir}/${params.mode}/xeniumranger/import_segementation", + mode: params.publish_dir_mode, + ] + ext.args = {[ + params.expansion_distance != null ? "--expansion-distance=${params.expansion_distance}" : "", + ].join(' ').trim()} + } + + // ---------------------------- proseg --------------------------------------------------- + + withName: PROSEG { + publishDir = [ + path: "${params.outdir}/${params.mode}/proseg/preset", + mode: params.publish_dir_mode, + ] + ext.args = {[ + params.format != null ? "--${params.format}" : "", + ].join(' ').trim()} + } + + withName: PROSEG2BAYSOR { + publishDir = [ + path: "${params.outdir}/${params.mode}/proseg/proseg2baysor", + mode: params.publish_dir_mode, + ] + } + + // ---------------------------- baysor --------------------------------------------------- + + withName: BAYSOR_RUN { + memory = { params.baysor_tiling ? 240.GB * task.attempt : 720.GB } + ext.args = "--min-molecules-per-cell ${params.baysor_tiling ? params.baysor_tiling_min_mols_per_cell : 30} --x-column x_location --y-column y_location --z-column z_location --gene-column feature_name" + ext.prior_column = params.baysor_prior == 'cells' ? 'cell_id' : null + ext.prior_confidence = params.baysor_prior != null ? params.baysor_prior_confidence : null + publishDir = [ + path: { "${params.outdir}/${params.mode}/baysor/run" }, + mode: params.publish_dir_mode, + ] + } + + withName: BAYSOR_SEGFREE { + memory = { 720.GB } + publishDir = [ + path: { "${params.outdir}/${params.mode}/baysor/segfree" }, + mode: params.publish_dir_mode, + ] + } + + withName: BAYSOR_CREATE_DATASET { + publishDir = [ + path: { "${params.outdir}/${params.mode}/baysor/create_dataset" }, + mode: params.publish_dir_mode, + ] + } + + withName: BAYSOR_PREPROCESS_TRANSCRIPTS { + publishDir = [ + path: { "${params.outdir}/${params.mode}/baysor/preprocess" }, + mode: params.publish_dir_mode, + ] + } + + withName: BAYSOR_PREVIEW { + memory = { 240.GB * task.attempt } + publishDir = [ + path: { "${params.outdir}/${params.mode}/baysor/preview" }, + mode: params.publish_dir_mode, + ] + } + + // ---------------------------- xenium_patch (tiling) ------------------------------------ + + withName: 'XENIUM_PATCH_DIVIDE' { + ext.tile_width = params.baysor_tiling_micron + ext.overlap = params.baysor_tiling_overlap + ext.balanced = params.baysor_tiling_balanced + publishDir = [ + path: { "${params.outdir}/${meta.id}/xenium_patch" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'XENIUM_PATCH_STITCH' { + ext.filter_method = params.patch_filter_method ?: null + ext.iqr_multiplier = params.patch_filter_iqr_multiplier + ext.z_threshold = params.patch_filter_z_threshold + ext.args = { "--min-transcripts-per-cell ${params.baysor_tiling_min_transcripts_per_cell}" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/xenium_patch" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + // ---------------------------- segger --------------------------------------------------- + + withName: SEGGER_CREATE_DATASET { + publishDir = [ + path: { "${params.outdir}/${params.mode}/segger/create_dataset" }, + mode: params.publish_dir_mode, + ] + ext.args = {[ + params.format != null ? "--sample-type ${params.format}" : "", + params.tile_width != null ? "--tile-width ${params.tile_width}" : "", + params.tile_height != null ? "--tile-height ${params.tile_height}" : "", + ].join(' ').trim()} + } + + withName: SEGGER_TRAIN { + publishDir = [ + path: { "${params.outdir}/${params.mode}/segger/train" }, + mode: params.publish_dir_mode, + ] + ext.args = {[ + "--init_emb 8 --hidden_channels 32 --num_tx_tokens 10000 --out_channels 8 --heads 2 --num_mid_layers 2 --strategy auto --precision bf16-mixed", + params.batch_size_train != null ? "--batch_size ${params.batch_size_train}" : "", + params.max_epochs != null ? "--max_epochs ${params.max_epochs}" : "", + params.segger_num_workers != null ? "--num_workers ${params.segger_num_workers}" : "", + ].join(' ').trim()} + } + + withName: SEGGER_PREDICT { + publishDir = [ + path: { "${params.outdir}/${params.mode}/segger/predict" }, + mode: params.publish_dir_mode, + // Skip partitioned parquet dirs (Hive-style) that S3 copy can't handle + saveAs: { filename -> filename.contains('transcripts_df.parquet') ? null : filename }, + ] + ext.args = {[ + params.batch_size_predict != null ? "--batch-size ${params.batch_size_predict}" : "", + params.cc_analysis != null ? "--cc-analysis ${params.cc_analysis}" : "", + params.segger_knn_method != null ? "--knn-method ${params.segger_knn_method}" : "", + ].join(' ').trim()} + } + + // ---------------------------- ficture ------------------------------------------ + + withName: FICTURE_PREPROCESS { + publishDir = [ + path: "${params.outdir}/${params.mode}/ficture/preprocess", + mode: params.publish_dir_mode, + ] + ext.args = {"--negative-control-regex '${params.negative_control_regex}'"} + } + + // ---------------------------- utility modules ----------------------------------- + + + withName: UNTAR { + publishDir = [ + path: { "${params.outdir}/${params.mode}/untar/" }, + mode: params.publish_dir_mode, + ] + } + + withName: RESOLIFT { + publishDir = [ + path: { "${params.outdir}/${params.mode}/resolift/" }, + mode: params.publish_dir_mode, + ] + } + + withName: PARQUET_TO_CSV { + publishDir = [ + path: { "${params.outdir}/${params.mode}/utility/parquet_to_csv" }, + mode: params.publish_dir_mode, + ] + } + + withName: EXTRACT_PREVIEW_DATA { + publishDir = [ + path: { "${params.outdir}/${params.mode}/utility/preview_data/" }, + mode: params.publish_dir_mode, + ] + } + + withName: GET_TRANSCRIPTS_COORDINATES { + publishDir = [ + path: { "${params.outdir}/${params.mode}/utility/get_coordinates/" }, + mode: params.publish_dir_mode, + ] + } + + withName: RESIZE_TIF { + publishDir = [ + path: { "${params.outdir}/${params.mode}/utility/resize_tif/" }, + mode: params.publish_dir_mode, + ] + } + + withName: SEGGER2XR { + publishDir = [ + path: { "${params.outdir}/${params.mode}/utility/segger2xr/" }, + mode: params.publish_dir_mode, + ] + } + + withName: SPLIT_TRANSCRIPTS { + publishDir = [ + path: { "${params.outdir}/${params.mode}/utility/split_transcripts/" }, + mode: params.publish_dir_mode, + ] + } + + // ---------------------------- spatialdata -------------------------------------- + + withName: SPATIALDATA_WRITE { + publishDir = [ + path: { "${params.outdir}/${params.mode}/spatialdata/write" }, + mode: params.publish_dir_mode, + ] + ext.args = { params.format ? "--format ${params.format}" : '' } + } + + withName: SPATIALDATA_MERGE { + publishDir = [ + path: { "${params.outdir}/${params.mode}/spatialdata/merge" }, + mode: params.publish_dir_mode, + ] + } + + withName: SPATIALDATA_META { + publishDir = [ + path: { "${params.outdir}/${params.mode}/spatialdata/meta" }, + mode: params.publish_dir_mode, + ] + } + + // ---------------------------- cellpose ----------------------------------------- + + // GPU is auto-detected via task.accelerator in the official nf-core cellpose module + withName: CELLPOSE { + publishDir = [ + path: { "${params.outdir}/${params.mode}/cellpose" }, + mode: params.publish_dir_mode, + ] + ext.args = "--flow_threshold 0 --batch_size 1" + } + + withName: CELLPOSE_CELLS { + publishDir = [ + path: { "${params.outdir}/${params.mode}/cellpose_cells" }, + mode: params.publish_dir_mode, + ] + ext.args = "--flow_threshold 0 --batch_size 1" + } + + // ---------------------------- stardist ----------------------------------------- + + withName: '.*STARDIST.*' { + ext.args = {[ + params.stardist_prob_thresh != null ? "--prob_thresh ${params.stardist_prob_thresh}" : "", + params.stardist_nms_thresh != null ? "--nms_thresh ${params.stardist_nms_thresh}" : "", + params.stardist_n_tiles != null ? "--n_tiles ${params.stardist_n_tiles}" : "", + ].join(' ').trim()} + } + + withName: 'STARDIST_NUCLEI' { + publishDir = [ + path: { "${params.outdir}/${params.mode}/stardist_nuclei" }, + mode: params.publish_dir_mode, + ] + } + + // StarDist preprocessing/postprocessing utilities + withName: '.*EXTRACT_DAPI.*' { + publishDir = [ + path: { "${params.outdir}/${params.mode}/extract_dapi" }, + mode: params.publish_dir_mode, + ] + } + + withName: '.*CONVERT_MASK_UINT32.*|.*CONVERT_CELLS_MASK.*|.*CONVERT_NUCLEI_MASK.*' { + publishDir = [ + path: { "${params.outdir}/${params.mode}/convert_mask" }, + mode: params.publish_dir_mode, + ] + } + + // ---------------------------- opt ----------------------------------------- + + withName: OPT_FLIP { + publishDir = [ + path: { "${params.outdir}/${params.mode}/opt/flip" }, + mode: params.publish_dir_mode, + ] + } + + withName: OPT_TRACK { + publishDir = [ + path: { "${params.outdir}/${params.mode}/opt/track" }, + mode: params.publish_dir_mode, + ] + } + + withName: OPT_STAT { + publishDir = [ + path: { "${params.outdir}/${params.mode}/opt/stat" }, + mode: params.publish_dir_mode, + ] + } } diff --git a/conf/test.config b/conf/test.config index bfcc3864..e317612d 100644 --- a/conf/test.config +++ b/conf/test.config @@ -5,17 +5,23 @@ Defines input files and everything required to run a fast and simple pipeline test. Use as follows: - nextflow run nf-core/spatialxe -profile test, --outdir + nextflow run nf-core/spatialaxe -profile test, --mode --outdir ---------------------------------------------------------------------------------------- */ process { + resourceLimits = [ cpus: 4, - memory: '15.GB', - time: '1.h' + memory: '8.GB', + time: '2.h', ] + + withName: UNTAR { + ext.prefix = "test_run" + } + } params { @@ -23,8 +29,7 @@ params { config_profile_description = 'Minimal test dataset to check pipeline function' // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv'// Genome references - genome = 'R64-1-1' + input = "${projectDir}/assets/samplesheet.csv" + outdir = 'results' + mode = 'coordinate' } diff --git a/conf/test_coordinate_mode.config b/conf/test_coordinate_mode.config new file mode 100644 index 00000000..cccdae57 --- /dev/null +++ b/conf/test_coordinate_mode.config @@ -0,0 +1,31 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/spatialaxe -profile test, --mode --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + + resourceLimits = [ + cpus: 4, + memory: '8.GB', + time: '2.h', + ] + +} + +params { + config_profile_name = 'Test profile coordinate mode' + config_profile_description = 'Minimal test dataset to check pipeline function in the coordinate mode' + + // Input data + input = "${projectDir}/assets/samplesheet.csv" + outdir = 'results' + mode = 'coordinate' +} diff --git a/conf/test_full.config b/conf/test_full.config index 4ddba9a2..68ce4186 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -5,7 +5,7 @@ Defines input files and everything required to run a full size pipeline test. Use as follows: - nextflow run nf-core/spatialxe -profile test_full, --outdir + nextflow run nf-core/spatialaxe -profile test_full, --outdir ---------------------------------------------------------------------------------------- */ @@ -14,11 +14,8 @@ params { config_profile_name = 'Full test profile' config_profile_description = 'Full test dataset to check pipeline function' - // Input data for full size test - // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' - - // Genome references - genome = 'R64-1-1' + // Input data + input = "${projectDir}/assets/samplesheet.csv" + outdir = 'results' + mode = 'coordinate' } diff --git a/conf/test_image_mode.config b/conf/test_image_mode.config new file mode 100644 index 00000000..ff9199d1 --- /dev/null +++ b/conf/test_image_mode.config @@ -0,0 +1,31 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/spatialaxe -profile test, --mode --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + + resourceLimits = [ + cpus: 4, + memory: '8.GB', + time: '2.h', + ] + +} + +params { + config_profile_name = 'Test profile image mode' + config_profile_description = 'Minimal test dataset to check pipeline function in the image mode' + + // Input data + input = "${projectDir}/assets/samplesheet.csv" + outdir = 'results' + mode = 'image' +} diff --git a/conf/test_preview_mode.config b/conf/test_preview_mode.config new file mode 100644 index 00000000..5f312892 --- /dev/null +++ b/conf/test_preview_mode.config @@ -0,0 +1,31 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/spatialaxe -profile test, --mode --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + + resourceLimits = [ + cpus: 4, + memory: '8.GB', + time: '2.h', + ] + +} + +params { + config_profile_name = 'Test profile preview mode' + config_profile_description = 'Minimal test dataset to check pipeline function in the preview mode' + + // Input data + input = "${projectDir}/assets/samplesheet.csv" + outdir = 'results' + mode = 'preview' +} diff --git a/conf/test_segfree_mode.config b/conf/test_segfree_mode.config new file mode 100644 index 00000000..4576929d --- /dev/null +++ b/conf/test_segfree_mode.config @@ -0,0 +1,31 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/spatialaxe -profile test, --mode --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + + resourceLimits = [ + cpus: 4, + memory: '8.GB', + time: '2.h', + ] + +} + +params { + config_profile_name = 'Test profile segfree mode' + config_profile_description = 'Minimal test dataset to check pipeline function in the segfree mode' + + // Input data + input = "${projectDir}/assets/samplesheet.csv" + outdir = 'results' + mode = 'segfree' +} diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md deleted file mode 100644 index f87187c8..00000000 --- a/docs/CONTRIBUTING.md +++ /dev/null @@ -1,185 +0,0 @@ ---- -title: Contributing -markdownPlugin: checklist ---- - -# `nf-core/spatialxe`: Contributing guidelines - -Hi there! -Thanks for taking an interest in improving nf-core/spatialxe. - -This page describes the recommended nf-core way to contribute to both nf-core/spatialxe and nf-core pipelines in general, including: - -- [General contribution guidelines](#general-contribution-guidelines): common procedures or guides across all nf-core pipelines. -- [Pipeline-specific contribution guidelines](#pipeline-specific-contribution-guidelines): procedures or guides specific to the development conventions of nf-core/spatialxe. - -> [!NOTE] -> If you need help using or modifying nf-core/spatialxe, ask on the nf-core Slack [#spatialxe](https://nfcore.slack.com/channels/spatialxe) channel ([join our Slack here](https://nf-co.re/join/slack)). - -## General contribution guidelines - -### Contribution quick start - -To contribute code to any nf-core pipeline: - -- [ ] Ensure you have Nextflow, nf-core tools, and nf-test installed. See the [nf-core/tools repository](https://github.com/nf-core/tools) for instructions. -- [ ] Check whether a GitHub [issue](https://github.com/nf-core/spatialxe/issues) about your idea already exists. If an issue does not exist, create one so that others are aware you are working on it. -- [ ] [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the [nf-core/spatialxe repository](https://github.com/nf-core/spatialxe) to your GitHub account. -- [ ] Create a branch on your forked repository and make your changes following [pipeline conventions](#pipeline-contribution-conventions) (if applicable). -- [ ] To fix major bugs, name your branch `patch` and follow the [patch release](#patch-release) process. -- [ ] Update relevant documentation within the `docs/` folder, use nf-core/tools to update `nextflow_schema.json`, and update `CITATIONS.md`. -- [ ] Run and/or update tests. See [Testing](#testing) for more information. -- [ ] [Lint](#lint-tests) your code with nf-core/tools. -- [ ] Submit a pull request (PR) against the `dev` branch and request a review. - -If you are not used to this workflow with Git, see the [GitHub documentation](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests) or [Git resources](https://try.github.io/) for more information. - -## Use of AI and LLMs - -The nf-core stance on the use of AI and LLMs is that humans are still ultimately responsible for their submitted code, regardless of the tools they use. - -If you’re using AI tools, try to stick by these guidelines: - -- Keep PRs as small and focussed as possible -- Avoid any unnecessary changes, such as moving or refactoring code (unless that is the explicit intention of the PR) -- Review all generated code yourself before opening a PR, and ensure that you understand it -- Engage with the community review process and expect to make revisions - -For more detail, see the the [blog post](https://nf-co.re/blog/2026/statement-on-ai) for a statement from the nf-core/core team. - -### Getting help - -For further information and help, see the [nf-core/spatialxe documentation](https://nf-co.re/spatialxe/usage) or ask on the nf-core [#spatialxe](https://nfcore.slack.com/channels/spatialxe) Slack channel ([join our Slack here](https://nf-co.re/join/slack)). - -### GitHub Codespaces - -You can contribute to nf-core/spatialxe without installing a local development environment on your machine by using [GitHub Codespaces](https://github.com/codespaces). - -[GitHub Codespaces](https://github.com/codespaces) is an online developer environment that runs in your browser, complete with VS Code and a terminal. -Most nf-core repositories include a devcontainer configuration, which creates a GitHub Codespaces environment specifically for Nextflow development. -The environment includes pre-installed nf-core tools, Nextflow, and a few other helpful utilities via a Docker container. - -To get started, open the repository in [Codespaces](https://github.com/nf-core/spatialxe/codespaces). - -### Testing - -Once you have made your changes, run the pipeline with nf-test to test them locally. -For additional information, use the `--verbose` flag to view the Nextflow console log output. - -```bash -nf-test test --tag test --profile +docker --verbose -``` - -If you have added new functionality, ensure you update the test assertions in the `.nf.test` files in the `tests/` directory. -Update the snapshots with the following command: - -```bash -nf-test test --tag test --profile +docker --verbose --update-snapshots -``` - -When you create a pull request with changes, GitHub Actions will run automatic tests. -Pull requests are typically reviewed when these tests are passing. - -Two types of tests are typically run: - -#### Lint tests - -nf-core has a [set of guidelines](https://nf-co.re/docs/specifications/overview) which all pipelines must follow. -To enforce these, run linting with nf-core/tools: - -```bash -nf-core pipelines lint -``` - -If you encounter failures or warnings, follow the linked documentation printed to screen. -For more information about linting tests, see [nf-core/tools API documentation](https://nf-co.re/docs/nf-core-tools/api_reference/latest/pipeline_lint_tests/actions_awsfulltest). - -#### Pipeline tests - -Each nf-core pipeline should be set up with a minimal set of test data. -GitHub Actions runs the pipeline on this data to ensure it runs through and exits successfully. -If there are any failures then the automated tests fail. -These tests are run with the latest available version of Nextflow and the minimum required version specified in the pipeline code. - -### Patch release - -> [!WARNING] -> Only in the unlikely event of a release that contains a critical bug. - -- [ ] Create a new branch `patch` on your fork based on `upstream/main` or `upstream/master`. -- [ ] Fix the bug and use nf-core/tools to bump the version to the next semantic version, for example, `1.2.3` → `1.2.4`. -- [ ] Open a Pull Request from `patch` directly to `main`/`master` with the changes. - -### Pipeline contribution conventions - -nf-core semi-standardises how you write code and other contributions to make the nf-core/spatialxe code and processing logic more understandable for new contributors and to ensure quality. - -#### Add a new pipeline step - -To contribute a new step to the pipeline, follow the general nf-core coding procedure. -Please also refer to the [pipeline-specific contribution guidelines](#pipeline-specific-contribution-guidelines): - -- [ ] Define the corresponding [input channel](#channel-naming-schemes) into your new process from the expected previous process channel. -- [ ] Install a module with nf-core/tools, or write a local module (see [default processes resource requirements](#default-processes-resource-requirements)), and add it to the target `.nf`. -- [ ] Define the output channel if needed. Mix the version output channel into `ch_versions` and relevant files into `ch_multiqc`. -- [ ] Add new or updated parameters to `nextflow.config` with a [default value](#default-parameter-values). -- [ ] Add new or updated parameters and relevant help text to `nextflow_schema.json` with [nf-core/tools](#default-parameter-values). -- [ ] Add validation for relevant parameters to the pipeline utilisation section of `utils_nfcore_\_pipeline/main.nf` subworkflow. -- [ ] Perform local tests to validate that the new code works as expected. - - [ ] If applicable, add a new test in the `tests` directory. -- [ ] Update `usage.md`, `output.md`, and `citation.md` as appropriate. -- [ ] [Lint](lint) the code with nf-core/tools. -- [ ] Update any diagrams or pipeline images as necessary. -- [ ] Update MultiQC config `assets/multiqc_config.yml` so relevant suffixes, file name cleanup, and module plots are in the appropriate order. -- [ ] If applicable, create a [MultiQC](https://seqera.io/multiqc/) module. -- [ ] Add a description of the output files and, if relevant, images from the MultiQC report to `docs/output.md`. - -To update the minimum required Nextflow version, see the [Nextflow version bumping](#nextflow-version-bumping) section below. For more information about pipeline contributions, see [pipeline-specific contribution guidelines](#pipeline-specific-contribution-guidelines). - -#### Channel naming schemes - -Use the following naming schemes for channels to make the channel flow easier to understand: - -- Initial process channel: `ch_output_from_` -- Intermediate and terminal channels: `ch__for_` - -#### Default parameter values - -Parameters should be initialised and defined with default values within the `params` scope in `nextflow.config`. -They should also be documented in the pipeline JSON schema. - -To update `nextflow_schema.json`, run: - -```bash -nf-core pipelines schema build -``` - -The schema builder interface that loads in your browser should automatically update the defaults in the parameter documentation. - -#### Default processes resource requirements - -If you write a local module, specify a default set of resource requirements for the process. - -Sensible defaults for process resource requirements (CPUs, memory, time) should be defined in `conf/base.config`. -Specify these with generic `withLabel:` selectors, so they can be shared across multiple processes and steps of the pipeline. - -nf-core provides a set of standard labels that you should follow where possible, as seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/main/nf_core/pipeline-template/conf/base.config). -These labels define resource defaults for single-core processes, modules that require a GPU, and different levels of multi-core configurations with increasing memory requirements. - -Values assigned within these labels can be dynamically passed to a tool using the the `${task.cpus}` and `${task.memory}` Nextflow variables in the `script:` block of a module (see an example in the [modules repository](https://github.com/nf-core/modules/blob/bd1b6a40f55933d94b8c9ca94ec8c1ea0eaf4b82/modules/nf-core/samtools/bam2fq/main.nf#L30)). - -#### Nextflow version bumping - -If you use a new feature from core Nextflow, bump the minimum required Nextflow version in the pipeline with: - -```bash -nf-core pipelines bump-version --nextflow . -``` - -#### Images and figures guidelines - -If you update images or graphics, follow the nf-core [style guidelines](https://nf-co.re/docs/community/brand/workflow-schematics). - -## Pipeline specific contribution guidelines - - diff --git a/docs/README.md b/docs/README.md index 2f7c904b..c7b0845a 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,6 +1,6 @@ -# nf-core/spatialxe: Documentation +# nf-core/spatialaxe: Documentation -The nf-core/spatialxe documentation is split into the following pages: +The nf-core/spatialaxe documentation is split into the following pages: - [Usage](usage.md) - An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. diff --git a/docs/images/nf-core-spatialaxe_logo_dark.png b/docs/images/nf-core-spatialaxe_logo_dark.png new file mode 100644 index 00000000..52ecaa70 Binary files /dev/null and b/docs/images/nf-core-spatialaxe_logo_dark.png differ diff --git a/docs/images/nf-core-spatialaxe_logo_light.png b/docs/images/nf-core-spatialaxe_logo_light.png new file mode 100644 index 00000000..67bdab99 Binary files /dev/null and b/docs/images/nf-core-spatialaxe_logo_light.png differ diff --git a/docs/images/nf-core-spatialxe_logo_dark.png b/docs/images/nf-core-spatialxe_logo_dark.png deleted file mode 100644 index 6a6a051e..00000000 Binary files a/docs/images/nf-core-spatialxe_logo_dark.png and /dev/null differ diff --git a/docs/images/nf-core-spatialxe_logo_light.png b/docs/images/nf-core-spatialxe_logo_light.png deleted file mode 100644 index e933cdfa..00000000 Binary files a/docs/images/nf-core-spatialxe_logo_light.png and /dev/null differ diff --git a/docs/images/spatialaxe-logo.png b/docs/images/spatialaxe-logo.png new file mode 100644 index 00000000..16876b2a Binary files /dev/null and b/docs/images/spatialaxe-logo.png differ diff --git a/docs/images/spatialaxe-logo.svg b/docs/images/spatialaxe-logo.svg new file mode 100644 index 00000000..1a4d75d2 --- /dev/null +++ b/docs/images/spatialaxe-logo.svg @@ -0,0 +1,224 @@ + + diff --git a/docs/images/spatialaxe-metromap.png b/docs/images/spatialaxe-metromap.png new file mode 100644 index 00000000..860b331e Binary files /dev/null and b/docs/images/spatialaxe-metromap.png differ diff --git a/docs/images/spatialaxe-metromap.svg b/docs/images/spatialaxe-metromap.svg new file mode 100644 index 00000000..7a09ca75 --- /dev/null +++ b/docs/images/spatialaxe-metromap.svg @@ -0,0 +1,3270 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + vcfvcftxtXenium onboardanalysis (XOA)vcfXR importsegmentationDatabundlehtmlQC reportjsonMetadatajsongene panelBundle(redefined)htmlMultiQCSpatialDatabundleXR relabelSpatialDataSpatialDataSpatialDataQuality ControlOnboardanalysis (OA)Default workflow for image modeDefault workflow for coordinate modeDefault workflow for segfree modeOptional stepsOutputsInputsmode: imageXR resegmentStarDistCellposeBaysorCellposevcfvcftiffimageBaysorpriorsegmentationmasksegmentationpolygon/csvsegmentationpolygon/csv/maskSeggerProsegBaysorvcfvcfparquettranscriptmoleculesreportBaysor previewhtmlparquettranscriptmoleculesmode: coordinatencvs/loomBaysor segfreeFicturevcfvcfparquettranscriptmoleculesmode: segfreemode: previewmode: coordinate diff --git a/docs/output.md b/docs/output.md index 2a27145d..68c0256a 100644 --- a/docs/output.md +++ b/docs/output.md @@ -1,33 +1,181 @@ -# nf-core/spatialxe: Output +# nf-core/spatialaxe: Output ## Introduction -This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline. +This document describes the output produced by the pipeline. The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. - - ## Pipeline overview The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: -- [FastQC](#fastqc) - Raw read QC -- [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline -- [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution +- Mode specific output: + - [image mode](#image-mode) + - [cooridnate mode](#coordinate-mode) + - [segfree mode](#segfree-mode) + - [qc mode](#qc-mode) (or using `--run_qc`) + - [preview mode](#preview-mode) +- [Additional functionality of spatialaxe](#additional-functionality): + - [SpatialData](#spatialdata) + - [Xenium Ranger import segmentation](#xenium-ranger-import-segmentation) + - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline + - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution -### FastQC +## Image mode
Output files -- `fastqc/` - - `*_fastqc.html`: FastQC report containing quality metrics. - - `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. +- `image/` + - `xeniumranger/` + - `resegment/` + - `${meta.id}/` Directory containing the output xenium bundle of Xenium + - `baysor/` + - `preprocess/` + - `*.csv` filtered transcripts CSV (for Baysor 0.7.1 Parquet.jl compatibility) + - `run/` + - `*segmentation.csv` results of segmentation + - `*.json` file with outlines of segmentation + - `segmentation_params.dump.toml` file with full list of parameters used for the model + - `segmentation_log.log` output file with metadata of running the workflow + - `segmentation_counts.loom` loom file with metadata + - `segmentation_cell_stats.csv` statistics of segmented cells + - `cellpose_cells/` + - `*masks.tif` labelled mask output from cellpose in tif format + - `*flows.tif` cell flow output from cellpose + - `*seg.npy` numpy array with cell segmentation data + - `stardist_nuclei/` + - `*.{tiff,tif}` labelled mask output from stardist in tif format + - `resolift/` + - `*.tiff` path to save the upscaled TIFF file
-[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). +## Coordinate mode + +
+Output files + +- `coordinate/` + - `xenium_patch/` + - `patches/patch_grid.json` patch_grid.json metadata file + - `patches/patch_*/transcripts.parquet` per-patch transcripts.parquet files (one per patch) + - `output/xr-cell-polygons.geojson` stitched cell polygons + - `output/xr-transcript-metadata.csv` transcript metadata + - `proseg/` + - `preset/` + - `cell-polygons.geojson.gz` 2D polygons for each cell in GeoJSON format. These are flattened from 3D + - `expected-counts.csv.gz` cell-by-gene count matrix + - `cell-metadata.csv.gz` cell centroids, volume, and other information + - `transcript-metadata.csv.gz` transcript ids, genes, revised positions, assignment probability + - `gene-metadata.csv.gz` per-gene summary statistics + - `rates.csv.gz` cell-by-gene Poisson rate parameters + - `cell-polygons-layers.geojson.gz` a separate, non-overlapping cell polygon for each z-layer, preserving 3D segmentation + - `cell-hulls.geojson.gz` convex hulls around assigned transcripts + - `proseg2baysor/` + - `xr-cell-polygons.geojson` 2D polygons for each cell in GeoJSON format. These are flattened from 3D + - `xr-transcript-metadata.csv` transcript ids, genes, revised positions, assignment probability + - `segger/` + - `create_dataset/` + - `${meta.id}/` directory to save the processed Segger dataset (in PyTorch Geometric format) + - `train/` + - `${meta.id}/` directory to save the trained model and checkpoints + - `predict/` + - `${meta.id}/` directory to save the segmentation results, including cell boundaries and associations + - `baysor/` + - `run/` + - `*segmentation.csv` results of segmentation + - `*.json` file with outlines of segmentation + - `segmentation_params.dump.toml` file with full list of parameters used for the model + - `segmentation_log.log` output file with metadata of running the workflow + - `segmentation_counts.loom` loom file with metadata + - `segmentation_cell_stats.csv` statistics of segmented cells + +
+ +## Segfree mode + +
+Output files + +- `segfree/` + - `baysor/` + - `preprocess/` + - `*.csv` filtered transcripts CSV (for Baysor 0.7.1 Parquet.jl compatibility) + - `segfree/` + - `ncvs.loom` loom file with neighborhood results + - `ncvs_segfree_log.log` Log file with summary statistics + - `ficture/` + - `preprocess/` + - `processed_transcripts.tsv.gz` transcirpt file used for FICTURE + - `coordinate_minmax.tsv` listing the min and max of the coordinates used for FICTURE + - `feature.clean.tsv.gz` another file contains the (unique) names of genes that should be used for FICUTRE + - `${meta.id}/results/` files containing the results of FICTURE + +
+ +## QC mode + +
+Output files + +- `opt/` + - `flip/` + - `*.fa` the forward oriented fasta file + - `track/` + - `*.tsv` TSV file containing the gene and transcript information to which each probe aligns + - `stat/` + - `*.tsv` TSV file containing the summary stats +- `multiqc/` + - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + - `multiqc_plots/`: directory containing static images from the report in various formats. + +
+ +## Preview mode + +
+Output files + +- `preview/` + - `baysor/` + - `preview/` + - `preview.html` segmentation preview + +
+ +## Additional Functionality + +### SpatialData + +The pipeline create spatialdata objects (data bundles) on various stages (see metromap in the [README](../README.md)) + +
+Output files + +- `spatialdata/` + - `write/${meta.id}/spatialdata/` spatialdata bundle of the raw data + - `meta/${meta.id}/spatialdata_spatialaxe_final/` spatialdata bundle of the final data with metadata + - `sdata['raw_table'].uns['spatialdata_attrs']` provenance metadata + - `sdata['raw_table'].uns['experiment_xenium']` experimental metadata + - `sdata['raw_table'].uns['gene_panel']` gene panel metadata + +
+ +### Xenium Ranger Import Segmentation) + +This step is needed to import segemntations from different methods into the xenium bundle and is called at different stages of the pipeline. + +
+Output files + +- `xeniumranger/` + - `import_segementation/` + - `${meta.id}/` directory containing the output xenium bundle of Xenium + +
### MultiQC @@ -43,7 +191,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d [MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. -Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQC. The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see . +The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see . ### Pipeline information diff --git a/docs/usage.md b/docs/usage.md index 9050ce93..02523048 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -1,63 +1,155 @@ -# nf-core/spatialxe: Usage +# nf-core/spatialaxe: Usage -## :warning: Please read this documentation on the nf-core website: [https://nf-co.re/spatialxe/usage](https://nf-co.re/spatialxe/usage) +## :warning: Please read this documentation on the nf-core website: [https://nf-co.re/spatialaxe/usage](https://nf-co.re/spatialaxe/usage) > _Documentation of pipeline parameters is generated automatically from the pipeline schema and can no longer be found in markdown files._ ## Introduction - - ## Samplesheet input -You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. +You will need to create a samplesheet with information about the sample you would like to analyse before running the pipeline. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. + +```csv title="samplesheet.csv" +sample,bundle,image +breast_cancer,/path/to/xenium/bundle,/path/to/morphology.ome.tif +``` + +| Column | Description | +| -------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `sample` | `Required`. Custom sample name. It is recommended to follow the same name from the output of the Xenium Onboard Analysis (XOA). Avoid using spaces in the sample name. | +| `bundle` | `Required`. Full path to the Xenium bundle, output of the Xenium Onboard Analysis. | +| `image` | `Optional`. Full path to morphology.ome.tif. If not provided, the morphology.ome.tif from the bundle is considered. | + +An [example samplesheet](../assets/example_samplesheet.csv) has been provided with the pipeline. + +#### Using the samplesheet ```bash --input '[path to samplesheet file]' ``` -### Multiple runs of the same sample +## Running the pipeline -The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: +The typical command for running the pipeline is as follows: -```csv title="samplesheet.csv" -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz +#### Image-based segmentation mode + +This runs the default image mode:
+`CELLPOSE ➔ BAYSOR ➔ XR-IMPORT-SEGMENTATION ➔ SPATIALDATA ➔ QC` + +```bash +nextflow run nf-core/spatialaxe \ + -profile + --input ./samplesheet.csv \ + --outdir ./results \ + --mode image ``` -### Full samplesheet +#### Coordinate-based (transcripts-based) segmentation mode + +This runs the default coordinate mode:
+`PROSEG ➔ PROSEG2BAYSOR ➔ XR-IMPORT-SEGMENTATION ➔ SPATIALDATA ➔ QC` -The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. +```bash +nextflow run nf-core/spatialaxe \ + -profile + --input ./samplesheet.csv \ + --outdir ./results \ + --mode coordinate +``` -A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. +### Segmenation free mode -```csv title="samplesheet.csv" -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz -CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz -TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz, -TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz, +`BAYSOR_SEGFREE` + +```bash +nextflow run nf-core/spatialaxe \ + -profile \ + --input samplesheet.csv \ + --outdir \ + --mode segfree ``` -| Column | Description | -| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | -| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +### Preview mode
-An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. +`BAYSOR_PREVIEW` -## Running the pipeline +```bash +nextflow run nf-core/spatialaxe \ + -profile \ + --input samplesheet.csv \ + --outdir \ + --mode preview +``` -The typical command for running the pipeline is as follows: +### Quality control
+ +It is possible to run the quality control with `--run_qc` to couple it with another mode like so: + +```bash +nextflow run nf-core/spatialaxe \ + -profile + --input ./samplesheet.csv \ + --outdir ./results \ + --mode image \ + --run_qc +``` + +It is also possible to run just he QC with: + +```bash +nextflow run nf-core/spatialaxe \ + -profile \ + --input samplesheet.csv \ + --outdir \ + --mode qc +``` + +- QC methods: + - [MultiQC Xenium Extra Plugin](https://github.com/MultiQC/xenium-extra) + - [OPT](https://github.com/JEFworks-Lab/off-target-probe-tracker) + +### Image-based Segmentation mode (--mode image):
+ +- cellpose +- baysor +- xeniumranger +- stardist + +### Coordinate-based (transcripts-based) Segmentation methods (--mode coordinate):
+ +- proseg +- baysor +- segger + +### Segmentation free methods (--mode segfree):
+ +- baysor +- ficture + +#### Run Segmentation with the methods methods mentioned above :
+ +eg: To run proseg segmentation use the `coordinate` mode and the `proseg` segmentation method (--method) + +```bash +nextflow run nf-core/spatialaxe \ + -profile + --input ./samplesheet.csv \ + --outdir ./results \ + --mode coordinate \ + --method proseg +``` + +eg: To run cellpose segmentation use the `image` mode and the `cellpose` segmentation method (--method) ```bash -nextflow run nf-core/spatialxe --input ./samplesheet.csv --outdir ./results --genome GRCh37 -profile docker +nextflow run nf-core/spatialaxe \ + -profile + --input ./samplesheet.csv \ + --outdir ./results \ + --mode image \ + --method cellpose ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. @@ -76,12 +168,12 @@ If you wish to repeatedly use the same parameters for multiple runs, rather than Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. > [!WARNING] -> Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/running/run-pipelines#configuring-pipelines), other infrastructural tweaks (such as output directories), or module arguments (args). +> Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). The above pipeline run specified with a params file in yaml format: ```bash -nextflow run nf-core/spatialxe -profile docker -params-file params.yaml +nextflow run nf-core/spatialaxe -profile docker -params-file params.yaml ``` with: @@ -89,7 +181,6 @@ with: ```yaml title="params.yaml" input: './samplesheet.csv' outdir: './results/' -genome: 'GRCh37' <...> ``` @@ -100,14 +191,14 @@ You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-c When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: ```bash -nextflow pull nf-core/spatialxe +nextflow pull nf-core/spatialaxe ``` ### Reproducibility It is a good idea to specify the pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. -First, go to the [nf-core/spatialxe releases page](https://github.com/nf-core/spatialxe/releases) and find the latest pipeline version - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. Of course, you can switch to another version by changing the number after the `-r` flag. +First, go to the [nf-core/spatialaxe releases page](https://github.com/nf-core/spatialaxe/releases) and find the latest pipeline version - numeric only (eg. `1.0.0`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.0.0`. Of course, you can switch to another version by changing the number after the `-r` flag. This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports. @@ -173,19 +264,19 @@ Specify the path to a specific config file (this is a core Nextflow command). Se Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the pipeline steps, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher resources request (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. -To change the resource requests, please see the [max resources](https://nf-co.re/docs/running/configuration/nextflow-for-your-system#set-max-resources) and [customise process resources](https://nf-co.re/docs/running/configuration/nextflow-for-your-system#customize-process-resources) section of the nf-core website. +To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. ### Custom Containers In some cases, you may wish to change the container or conda environment used by a pipeline steps for a particular tool. By default, nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However, in some cases the pipeline specified version maybe out of date. -To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/running/configuration/nextflow-for-your-system#update-tool-versions) section of the nf-core website. +To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/usage/configuration#updating-tool-versions) section of the nf-core website. ### Custom Tool Arguments A pipeline might not always support every possible argument or option of a particular tool used in pipeline. Fortunately, nf-core pipelines provide some freedom to users to insert additional parameters that the pipeline does not include by default. -To learn how to provide additional arguments to a particular tool of the pipeline, please see the [customising tool arguments](https://nf-co.re/docs/running/configuration/nextflow-for-your-system#modifying-tool-arguments) section of the nf-core website. +To learn how to provide additional arguments to a particular tool of the pipeline, please see the [customising tool arguments](https://nf-co.re/docs/usage/configuration#customising-tool-arguments) section of the nf-core website. ### nf-core/configs diff --git a/main.nf b/main.nf index 0ed1167a..ed5c9d42 100644 --- a/main.nf +++ b/main.nf @@ -1,11 +1,11 @@ #!/usr/bin/env nextflow /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - nf-core/spatialxe + nf-core/spatialaxe ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Github : https://github.com/nf-core/spatialxe - Website: https://nf-co.re/spatialxe - Slack : https://nfcore.slack.com/channels/spatialxe + Github : https://github.com/nf-core/spatialaxe + Website: https://nf-co.re/spatialaxe + Slack : https://nfcore.slack.com/channels/spatialaxe ---------------------------------------------------------------------------------------- */ @@ -15,21 +15,9 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { SPATIALXE } from './workflows/spatialxe' -include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_spatialxe_pipeline' -include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_spatialxe_pipeline' -include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_spatialxe_pipeline' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - GENOME PARAMETER VALUES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// TODO nf-core: Remove this line if you don't need a FASTA file -// This is an example of how to use getGenomeAttribute() to fetch parameters -// from igenomes.config using `--genome` -params.fasta = getGenomeAttribute('fasta') +include { SPATIALAXE } from './workflows/spatialaxe.nf' +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_spatialaxe_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_spatialaxe_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -40,7 +28,7 @@ params.fasta = getGenomeAttribute('fasta') // // WORKFLOW: Run main analysis pipeline depending on type of input // -workflow NFCORE_SPATIALXE { +workflow NFCORE_SPATIALAXE { take: samplesheet // channel: samplesheet read in from --input @@ -50,15 +38,50 @@ workflow NFCORE_SPATIALXE { // // WORKFLOW: Run pipeline // - SPATIALXE ( + SPATIALAXE ( samplesheet, + params.alignment_csv, + params.baysor_config, + params.baysor_prior, + params.baysor_scale, + params.baysor_tiling, + params.baysor_tiling_scale, + params.buffer_samples, + params.buffer_size, + params.cell_segmentation_only, + params.cellpose_downscale, + params.cellpose_model, + params.expansion_distance, + params.features, + params.gene_panel, + params.gene_synonyms, + params.max_x, + params.max_y, + params.method, + params.min_qv, + params.min_x, + params.min_y, + params.mode, params.multiqc_config, params.multiqc_logo, params.multiqc_methods_description, + params.nucleus_segmentation_only, + params.offtarget_probe_tracking, params.outdir, + params.probes_fasta, + params.qupath_polygons, + params.reference_annotations, + params.relabel_genes, + params.run_qc, + params.segger_model, + params.segmentation_mask, + params.sharpen_tiff, + params.stardist_nuclei_model, + params.tiling, + params.xeniumranger_only, ) emit: - multiqc_report = SPATIALXE.out.multiqc_report // channel: /path/to/multiqc_report.html + multiqc_report = SPATIALAXE.out.multiqc_report // channel: /path/to/multiqc_report.html } /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -81,13 +104,26 @@ workflow { params.input, params.help, params.help_full, - params.show_hidden + params.show_hidden, + params.format, + params.gene_panel, + params.gene_synonyms, + params.image_seg_methods, + params.method, + params.mode, + params.nucleus_segmentation_only, + params.offtarget_probe_tracking, + params.probes_fasta, + params.reference_annotations, + params.relabel_genes, + params.segmentation_mask, + params.transcript_seg_methods, ) // // WORKFLOW: Run main workflow // - NFCORE_SPATIALXE ( + NFCORE_SPATIALAXE ( PIPELINE_INITIALISATION.out.samplesheet ) // @@ -99,7 +135,8 @@ workflow { params.plaintext_email, params.outdir, params.monochrome_logs, - NFCORE_SPATIALXE.out.multiqc_report + params.hook_url, + NFCORE_SPATIALAXE.out.multiqc_report ) } diff --git a/modules.json b/modules.json index 09082eaf..c215c2ed 100644 --- a/modules.json +++ b/modules.json @@ -1,18 +1,71 @@ { - "name": "nf-core/spatialxe", - "homePage": "https://github.com/nf-core/spatialxe", + "name": "nf-core/spatialaxe", + "homePage": "https://github.com/nf-core/spatialaxe", "repos": { "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { - "fastqc": { + "cellpose": { "branch": "master", - "git_sha": "6d46786420b4d7bc88eba026eb389c0c5535d120", - "installed_by": ["modules"] + "git_sha": "0780b963d3ab087e861a4b74e9d0e404115e5352", + "installed_by": ["modules"], + "patch": "modules/nf-core/cellpose/cellpose.diff" }, "multiqc": { "branch": "master", - "git_sha": "008f9d3e61209bf995edac3ba531f54e269e1215", + "git_sha": "cb9f4bec379866ca560f7f79d9a9a06bea8c9b51", + "installed_by": ["modules"], + "patch": "modules/nf-core/multiqc/multiqc.diff" + }, + "opt/flip": { + "branch": "master", + "git_sha": "7d3e5c9d3d44", + "installed_by": ["modules"], + "patch": "modules/nf-core/opt/flip/opt-flip.diff" + }, + "opt/stat": { + "branch": "master", + "git_sha": "7d3e5c9d3d44", + "installed_by": ["modules"], + "patch": "modules/nf-core/opt/stat/opt-stat.diff" + }, + "opt/track": { + "branch": "master", + "git_sha": "7d3e5c9d3d44", + "installed_by": ["modules"], + "patch": "modules/nf-core/opt/track/opt-track.diff" + }, + "stardist": { + "branch": "master", + "git_sha": "4e783502ab661bed13f15189401b73c93966831f", + "installed_by": ["modules"], + "patch": "modules/nf-core/stardist/stardist.diff" + }, + "untar": { + "branch": "master", + "git_sha": "447f7bc0fa41dfc2400c8cad4c0291880dc060cf", + "installed_by": ["modules"], + "patch": "modules/nf-core/untar/untar.diff" + }, + "unzip": { + "branch": "master", + "git_sha": "4dd9d8439a429c7ee566e0e2347f76ddeef27e66", + "installed_by": ["modules"], + "patch": "modules/nf-core/unzip/unzip.diff" + }, + "xeniumranger/import-segmentation": { + "branch": "master", + "git_sha": "39365e944e936511e33b993cdd978e0f12adac9a", + "installed_by": ["modules"] + }, + "xeniumranger/relabel": { + "branch": "master", + "git_sha": "39365e944e936511e33b993cdd978e0f12adac9a", + "installed_by": ["modules"] + }, + "xeniumranger/resegment": { + "branch": "master", + "git_sha": "39365e944e936511e33b993cdd978e0f12adac9a", "installed_by": ["modules"] } } @@ -26,12 +79,12 @@ }, "utils_nfcore_pipeline": { "branch": "master", - "git_sha": "a3fb7351b1fdb2b1de282b765816bbea190e86a8", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["subworkflows"] }, "utils_nfschema_plugin": { "branch": "master", - "git_sha": "fdc08b8b1ae74f56686ce21f7ea11ad11990ce57", + "git_sha": "4b406a74dc0449c0401ed87d5bfff4252fd277fd", "installed_by": ["subworkflows"] } } diff --git a/modules/local/baysor/create_dataset/main.nf b/modules/local/baysor/create_dataset/main.nf new file mode 100644 index 00000000..7e15ce8b --- /dev/null +++ b/modules/local/baysor/create_dataset/main.nf @@ -0,0 +1,45 @@ +process BAYSOR_CREATE_DATASET { + tag "${meta.id}" + label 'process_medium' + + container "khersameesh24/baysor:0.7.1" + + input: + tuple val(meta), path(transcripts) + val sample_fraction + + output: + tuple val(meta), path("${prefix}/sampled_transcripts.csv"), emit: sampled_transcripts + tuple val("${task.process}"), val('baysor'), eval("baysor --version 2>&1 | grep -oP '\\d+\\.\\d+\\.\\d+' || echo unknown"), topic: versions, emit: versions_baysor + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("BAYSOR_CREATE_DATASET module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + prefix = task.ext.prefix ?: "${meta.id}" + + """ + baysor_create_dataset.py \\ + --transcripts ${transcripts} \\ + --sample-fraction ${sample_fraction} \\ + --prefix ${prefix} + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("BAYSOR_CREATE_DATASET module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p ${prefix} + touch "${prefix}/sampled_transcripts.csv" + """ +} diff --git a/modules/local/baysor/create_dataset/meta.yml b/modules/local/baysor/create_dataset/meta.yml new file mode 100644 index 00000000..f520c92b --- /dev/null +++ b/modules/local/baysor/create_dataset/meta.yml @@ -0,0 +1,82 @@ +name: "baysor_create_dataset" +description: Subsample a transcripts CSV to create a smaller Baysor input dataset. +keywords: + - xenium + - baysor + - dataset + - transcripts + - subsample +tools: + - "python": + description: | + Python programming language interpreter. + homepage: "https://www.python.org/" + documentation: "https://docs.python.org/3/" + tool_dev_url: "https://github.com/python/cpython" + doi: "no DOI available" + licence: ["PSF-2.0"] + identifier: "" + - "baysor": + description: | + Bayesian Segmentation of Spatial Transcriptomics Data. + homepage: "https://kharchenkolab.github.io/Baysor/dev/" + documentation: "https://kharchenkolab.github.io/Baysor/dev/" + tool_dev_url: "https://github.com/kharchenkolab/Baysor" + doi: "10.1038/s41587-021-01044-w" + licence: ["MIT"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - transcripts: + type: file + description: Transcripts CSV file to subsample for Baysor dataset creation. + pattern: "*.csv" + ontologies: [] + - sample_fraction: + type: float + description: Fraction of transcripts to retain in the subsampled dataset (0-1). + +output: + sampled_transcripts: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "${prefix}/sampled_transcripts.csv": + type: file + description: Subsampled transcripts CSV used as Baysor input dataset. + pattern: "*/sampled_transcripts.csv" + ontologies: [] + versions_baysor: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "baysor --version 2>&1 | grep -oP '\\d+\\.\\d+\\.\\d+' || echo unknown": + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - baysor: + type: string + description: The tool name + - "baysor --version 2>&1 | grep -oP '\\d+\\.\\d+\\.\\d+' || echo unknown": + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@khersameesh24" +maintainers: + - "@khersameesh24" diff --git a/modules/local/baysor/create_dataset/tests/main.nf.test b/modules/local/baysor/create_dataset/tests/main.nf.test new file mode 100644 index 00000000..e5fc8214 --- /dev/null +++ b/modules/local/baysor/create_dataset/tests/main.nf.test @@ -0,0 +1,60 @@ +nextflow_process { + + name "Test Process BAYSOR CREATE_DATASET" + script "../main.nf" + process "BAYSOR_CREATE_DATASET" + config "./nextflow.config" + + tag "modules" + tag "modules_local" + tag "baysor" + tag "baysor/create_dataset" + + test("baysor create dataset - transcripts.parquet") { + + when { + process { + """ + input[0] = channel.of([ + [id: "test_run_baysor"], + file(params.modules_testdata_base_path + "spatial_omics/xenium/homo_sapiens/spatial_gene_expression.csv", checkIfExists: true) + ]) + input[1] = 0.3 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert file(process.out.sampled_transcripts[0][1]).exists() }, + { assert file(process.out.sampled_transcripts[0][1]).name == "sampled_transcripts.csv" }, + { assert snapshot(process.out.versions_baysor).match("versions") } + ) + } + } + + test("baysor create dataset stub") { + + options "-stub" + + when { + process { + """ + input[0] = channel.of([ + [id: "test_run_baysor"], + file(params.modules_testdata_base_path + "spatial_omics/xenium/homo_sapiens/spatial_gene_expression.csv", checkIfExists: true) + ]) + input[1] = 0.3 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions_baysor).match("versions_stub") } + ) + } + } +} diff --git a/modules/local/baysor/create_dataset/tests/main.nf.test.snap b/modules/local/baysor/create_dataset/tests/main.nf.test.snap new file mode 100644 index 00000000..1fb129a3 --- /dev/null +++ b/modules/local/baysor/create_dataset/tests/main.nf.test.snap @@ -0,0 +1,34 @@ +{ + "versions_stub": { + "content": [ + [ + [ + "BAYSOR_CREATE_DATASET", + "baysor", + "0.7.1" + ] + ] + ], + "timestamp": "2026-06-17T13:16:19.929465332", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.2" + } + }, + "versions": { + "content": [ + [ + [ + "BAYSOR_CREATE_DATASET", + "baysor", + "0.7.1" + ] + ] + ], + "timestamp": "2026-06-17T13:16:11.036406226", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.2" + } + } +} \ No newline at end of file diff --git a/modules/local/baysor/create_dataset/tests/nextflow.config b/modules/local/baysor/create_dataset/tests/nextflow.config new file mode 100644 index 00000000..f8b3a30a --- /dev/null +++ b/modules/local/baysor/create_dataset/tests/nextflow.config @@ -0,0 +1,9 @@ +process { + + resourceLimits = [ + cpus: 4, + memory: '8.GB', + time: '2.h', + ] + +} diff --git a/modules/local/baysor/preprocess/main.nf b/modules/local/baysor/preprocess/main.nf new file mode 100644 index 00000000..cfe6fe3b --- /dev/null +++ b/modules/local/baysor/preprocess/main.nf @@ -0,0 +1,55 @@ +process BAYSOR_PREPROCESS_TRANSCRIPTS { + tag "${meta.id}" + label 'process_medium' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/94/9409ce399922a5746bea1b7df5668c3d1d79b9af49a15950d9818c4fe45ac749/data' : + 'community.wave.seqera.io/library/pandas_procs_pyarrow:d8f882b65dfea451' }" + + input: + tuple val(meta), path(transcripts) + val min_qv + val max_x + val min_x + val max_y + val min_y + + output: + tuple val(meta), path("${prefix}/filtered_transcripts.csv"), emit: transcripts_file + tuple val("${task.process}"), val('python'), eval("python3 --version | sed 's/Python //'"), topic: versions, emit: versions_python + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("BAYSOR_PREPROCESS_TRANSCRIPTS module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + prefix = task.ext.prefix ?: "${meta.id}" + + """ + baysor_preprocess_transcripts.py \\ + --transcripts ${transcripts} \\ + --prefix ${prefix} \\ + --min-qv ${min_qv} \\ + --min-x ${min_x} \\ + --max-x ${max_x} \\ + --min-y ${min_y} \\ + --max-y ${max_y} + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("BAYSOR_PREPROCESS_TRANSCRIPTS module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p ${prefix} + touch ${prefix}/filtered_transcripts.csv + """ +} diff --git a/modules/local/baysor/preprocess/meta.yml b/modules/local/baysor/preprocess/meta.yml new file mode 100644 index 00000000..0bff8178 --- /dev/null +++ b/modules/local/baysor/preprocess/meta.yml @@ -0,0 +1,88 @@ +name: "baysor_preprocess" +description: Filter transcript.parquet file based on the specified thresholds +keywords: + - baysor + - transcripts + - filter_transcripts +tools: + - "baysor": + description: "Baysor is a tool that segments cells using spatial gene expression maps. Optionally, segmentation masks can be given as additional input." + homepage: "https://kharchenkolab.github.io/Baysor/dev/" + documentation: "https://kharchenkolab.github.io/Baysor/dev/" + tool_dev_url: "https://github.com/kharchenkolab/Baysor" + doi: "https://doi.org/10.1038/s41587-021-01044-w" + licence: ["MIT license"] + identifier: + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - transcripts: + type: file + description: transcripts.parquet file from the xenium bundle + pattern: "*.parquet" + + - min_qv: + type: float + description: minimum Q-Score to pass filtering (default - 20.0) + - max_x: + type: float + description: Only keep transcripts whose x-coordinate is less than specified limit + if no limit is specified, the default value will retain all + transcripts since Xenium slide is <24000 microns in x and y (default - 24000.0) + - min_x: + type: float + description: only keep transcripts whose x-coordinate is greater than specified limit + if no limit is specified, the default minimum value will be 0.0 + - max_y: + type: float + description: only keep transcripts whose y-coordinate is less than specified limit + if no limit is specified, the default value will retain all + transcripts since Xenium slide is <24000 microns in x and y (default - 24000.0) + - min_y: + type: float + description: only keep transcripts whose y-coordinate is greater than specified limit + if no limit is specified, the default minimum value will be 0.0 + +output: + transcripts_file: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.csv": + type: file + description: filtered transcripts CSV (for Baysor 0.7.1 Parquet.jl compatibility) + pattern: "filtered_transcripts.csv" + + versions_python: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "python3 --version | sed 's/Python //'": + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "python3 --version | sed 's/Python //'": + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@khersameesh24" +maintainers: + - "@khersameesh24" diff --git a/modules/local/baysor/preprocess/tests/main.nf.test b/modules/local/baysor/preprocess/tests/main.nf.test new file mode 100644 index 00000000..f3112c25 --- /dev/null +++ b/modules/local/baysor/preprocess/tests/main.nf.test @@ -0,0 +1,68 @@ +nextflow_process { + + name "Test Process BAYSOR PREPROCESS TRANSCRIPTS" + script "../main.nf" + process "BAYSOR_PREPROCESS_TRANSCRIPTS" + config "./nextflow.config" + + tag "modules" + tag "modules_local" + tag "baysor" + tag "baysor/preprocess" + + test("baysor preprocess transcripts - transcripts.parquet") { + + when { + process { + """ + input[0] = channel.of([ + [id: "test_run_baysor"], + file("https://raw.githubusercontent.com/khersameesh24/test-datasets/baysor/transcripts.parquet", checkIfExists: true) + ]) + input[1] = 20 + input[2] = 24000.0 + input[3] = 0.0 + input[4] = 24000.0 + input[5] = 0.0 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert file(process.out.transcripts_file[0][1]).exists() }, + { assert file(process.out.transcripts_file[0][1]).name == "filtered_transcripts.csv" }, + { assert snapshot(process.out.versions_python).match("versions") } + ) + } + } + + test("baysor preprocess transcripts stub") { + + options "-stub" + + when { + process { + """ + input[0] = channel.of([ + [id: "test_run_baysor"], + file("https://raw.githubusercontent.com/khersameesh24/test-datasets/baysor/transcripts.parquet", checkIfExists: true) + ]) + input[1] = 20 + input[2] = 24000.0 + input[3] = 0.0 + input[4] = 24000.0 + input[5] = 0.0 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions_python).match("versions_stub") } + ) + } + } +} diff --git a/modules/local/baysor/preprocess/tests/main.nf.test.snap b/modules/local/baysor/preprocess/tests/main.nf.test.snap new file mode 100644 index 00000000..1baeceb1 --- /dev/null +++ b/modules/local/baysor/preprocess/tests/main.nf.test.snap @@ -0,0 +1,34 @@ +{ + "versions_stub": { + "content": [ + [ + [ + "BAYSOR_PREPROCESS_TRANSCRIPTS", + "python", + "3.14.4" + ] + ] + ], + "timestamp": "2026-04-29T18:32:45.74436808", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + }, + "versions": { + "content": [ + [ + [ + "BAYSOR_PREPROCESS_TRANSCRIPTS", + "python", + "3.14.4" + ] + ] + ], + "timestamp": "2026-04-29T18:32:39.447114547", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file diff --git a/modules/local/baysor/preprocess/tests/nextflow.config b/modules/local/baysor/preprocess/tests/nextflow.config new file mode 100644 index 00000000..f8b3a30a --- /dev/null +++ b/modules/local/baysor/preprocess/tests/nextflow.config @@ -0,0 +1,9 @@ +process { + + resourceLimits = [ + cpus: 4, + memory: '8.GB', + time: '2.h', + ] + +} diff --git a/modules/local/baysor/preview/main.nf b/modules/local/baysor/preview/main.nf new file mode 100644 index 00000000..b47bf43c --- /dev/null +++ b/modules/local/baysor/preview/main.nf @@ -0,0 +1,50 @@ +process BAYSOR_PREVIEW { + tag "${meta.id}" + label 'process_medium' + + container "khersameesh24/baysor:0.7.1" + + input: + tuple val(meta), path(transcripts), path(config) + + output: + tuple val(meta), path("${prefix}/preview.html"), emit: preview_html + tuple val("${task.process}"), val('baysor'), eval("baysor --version 2>&1 | grep -oP '\\d+\\.\\d+\\.\\d+' || echo unknown"), topic: versions, emit: versions_baysor + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("BAYSOR_PREVIEW module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + """ + export JULIA_NUM_THREADS=${task.cpus} + + mkdir -p ${prefix} + + baysor preview \\ + ${transcripts} \\ + --config ${config} \\ + --output ${prefix}/preview.html + ${args} + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("BAYSOR_PREVIEW module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p ${prefix} + touch ${prefix}/preview.html + """ +} diff --git a/modules/local/baysor/preview/meta.yml b/modules/local/baysor/preview/meta.yml new file mode 100644 index 00000000..f23914e7 --- /dev/null +++ b/modules/local/baysor/preview/meta.yml @@ -0,0 +1,72 @@ +name: "baysor_preview" +description: Preview run for visualization of data. +keywords: + - exploratory-data-analysis +tools: + - "baysor": + description: "Baysor is a tool that segments cells using spatial gene expression maps. Optionally, segmentation masks can be given as additional input." + homepage: "https://kharchenkolab.github.io/Baysor/dev/" + documentation: "https://kharchenkolab.github.io/Baysor/dev/" + tool_dev_url: "https://github.com/kharchenkolab/Baysor" + doi: "https://doi.org/10.1038/s41587-021-01044-w" + licence: ["MIT license"] + identifier: + +## baysor_preview requires a transcript map of the data and a configuration file with argument values +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - transcripts_csv: + type: file + description: CSV file + pattern: "*.csv" + + - config_toml: + type: file + description: TOML file with config arguments + pattern: "*.toml" + +## segmentation results +output: + preview_html: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "preview.html": + type: file + description: segmentation preview + pattern: "preview.html" + versions_baysor: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "baysor --version 2>&1 | grep -oP '\\d+\\.\\d+\\.\\d+' || echo unknown": + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - baysor: + type: string + description: The tool name + - "baysor --version 2>&1 | grep -oP '\\d+\\.\\d+\\.\\d+' || echo unknown": + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@sebgoti8" + - "@khersameesh24" +maintainers: + - "@sebgoti8" + - "@khersameesh24" diff --git a/modules/local/baysor/preview/tests/main.nf.test b/modules/local/baysor/preview/tests/main.nf.test new file mode 100644 index 00000000..d3f522d7 --- /dev/null +++ b/modules/local/baysor/preview/tests/main.nf.test @@ -0,0 +1,60 @@ +nextflow_process { + + name "Test Process BAYSOR PREVIEW" + script "../main.nf" + process "BAYSOR_PREVIEW" + + tag "modules" + tag "modules_local" + tag "baysor" + tag "baysor/preview" + tag "preview" + + test("baysor preview - transcripts.parquet") { + + when { + process { + """ + input[0] = channel.of([ + [id: "test_run_baysor"], + file("https://raw.githubusercontent.com/khersameesh24/test-datasets/baysor/transcripts.parquet", checkIfExists: true), + file("https://raw.githubusercontent.com/khersameesh24/test-datasets/baysor/config/xenium.toml", checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert file(process.out.preview_html[0][1]).exists() }, + { assert file(process.out.preview_html[0][1]).name == "preview.html" }, + { assert snapshot(process.out.versions_baysor).match("versions") } + ) + } + } + + test("baysor preview stub") { + + options "-stub" + + when { + process { + """ + input[0] = channel.of([ + [id: "test_run_baysor"], + file("https://raw.githubusercontent.com/khersameesh24/test-datasets/baysor/transcripts.parquet", checkIfExists: true), + file("https://raw.githubusercontent.com/khersameesh24/test-datasets/baysor/config/xenium.toml", checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions_baysor).match("versions_stub") } + ) + } + } +} diff --git a/modules/local/baysor/preview/tests/main.nf.test.snap b/modules/local/baysor/preview/tests/main.nf.test.snap new file mode 100644 index 00000000..08ec9503 --- /dev/null +++ b/modules/local/baysor/preview/tests/main.nf.test.snap @@ -0,0 +1,34 @@ +{ + "versions_stub": { + "content": [ + [ + [ + "BAYSOR_PREVIEW", + "baysor", + "0.7.1" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-03-21T03:58:47.202659888" + }, + "versions": { + "content": [ + [ + [ + "BAYSOR_PREVIEW", + "baysor", + "0.7.1" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-03-21T03:58:35.006511807" + } +} \ No newline at end of file diff --git a/modules/local/baysor/preview/tests/nextflow.config b/modules/local/baysor/preview/tests/nextflow.config new file mode 100644 index 00000000..f8b3a30a --- /dev/null +++ b/modules/local/baysor/preview/tests/nextflow.config @@ -0,0 +1,9 @@ +process { + + resourceLimits = [ + cpus: 4, + memory: '8.GB', + time: '2.h', + ] + +} diff --git a/modules/local/baysor/run/main.nf b/modules/local/baysor/run/main.nf new file mode 100644 index 00000000..53cdfeaa --- /dev/null +++ b/modules/local/baysor/run/main.nf @@ -0,0 +1,67 @@ +process BAYSOR_RUN { + tag "${meta.id}" + label 'process_high' + + container "khersameesh24/baysor:0.7.1" + + input: + tuple val(meta), path(transcripts), path(prior_segmentation), path(config), val(scale) + + output: + tuple val(meta), path("${prefix}/segmentation.csv"), path("${prefix}/segmentation_polygons_2d.json"), emit: segmentation + tuple val("${task.process}"), val('baysor'), eval("baysor --version 2>&1 | grep -oP '\\d+\\.\\d+\\.\\d+' || echo unknown"), topic: versions, emit: versions_baysor + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("BAYSOR_RUN module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + def args = task.ext.args ?: '' + // Column-based prior (e.g. :cell_id) takes precedence over file-based prior + def prior_col = task.ext.prior_column ? ":${task.ext.prior_column}" : '' + def prior_seg = prior_col ?: (prior_segmentation ? prior_segmentation : '') + def confidence = task.ext.prior_confidence != null ? "--prior-segmentation-confidence=${task.ext.prior_confidence}" : '' + def scaling_factor = scale ? "--scale=${scale}" : '' + def config_arg = config ? "--config=${config}" : '' + prefix = task.ext.prefix ?: "${meta.id}" + + // Build command parts, filtering out empty strings + def cmd_parts = [ + "baysor run", + "${transcripts}", + prior_seg, + scaling_factor, + confidence, + "--output=\"${prefix}/segmentation.csv\"", + config_arg, + "--plot", + "--polygon-format=GeometryCollectionLegacy", + args + ].findAll { cmd -> cmd } + + """ + export JULIA_NUM_THREADS=${task.cpus} + + mkdir -p ${prefix} + + ${cmd_parts.join(' \\\n ')} + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("BAYSOR_RUN module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p ${prefix} + touch "${prefix}/segmentation.csv" + touch "${prefix}/segmentation_polygons_2d.json" + """ +} diff --git a/modules/local/baysor/run/meta.yml b/modules/local/baysor/run/meta.yml new file mode 100644 index 00000000..1fbdbbaa --- /dev/null +++ b/modules/local/baysor/run/meta.yml @@ -0,0 +1,106 @@ +name: "baysor_run" +description: Bayesian segmentation of spatial transcriptomics data. +keywords: + - segmentation + - spatial transcriptomics + - cell clustering + - imaging +tools: + - "baysor": + description: "Baysor is a tool that segments cells using spatial gene expression maps. Optionally, segmentation masks can be given as additional input." + homepage: "https://kharchenkolab.github.io/Baysor/dev/" + documentation: "https://kharchenkolab.github.io/Baysor/dev/" + tool_dev_url: "https://github.com/kharchenkolab/Baysor" + doi: "https://doi.org/10.1038/s41587-021-01044-w" + licence: ["MIT license"] + identifier: + +## Baysor requires a transcript map of the data and a configuration file with argument values +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - transcripts_csv: + type: file + description: CSV file + pattern: "*.csv" + + - config_toml: + type: file + description: TOML file with config arguments + pattern: "*.toml" + +## segmentation results +output: + - segmentation: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*segmentation.csv": + type: file + description: results of segmentation + pattern: "segmentation.csv" + + - polygons: + type: file + description: | + File with outlines of segmentation + pattern: "*.json" + + - params: + type: file + description: | + File with full list of parameters used for the model + pattern: "segmentation_params.dump.toml" + + - log: + type: file + description: | + Output file with metadata of running the workflow + pattern: "segmentation_log.log" + + - loom: + type: file + description: | + Loom file with metadata + pattern: "segmentation_counts.loom" + + - stats: + type: file + description: | + Statistics of segmented cells + pattern: "segmentation_cell_stats.csv" + + - versions_baysor: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "baysor --version 2>&1 | grep -oP '\\d+\\.\\d+\\.\\d+' || echo unknown": + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "baysor --version 2>&1 | grep -oP '\\d+\\.\\d+\\.\\d+' || echo unknown": + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@sebgoti8" + - "@khersameesh24" +maintainers: + - "@sebgoti8" + - "@khersameesh24" diff --git a/modules/local/baysor/run/tests/main.nf.test b/modules/local/baysor/run/tests/main.nf.test new file mode 100644 index 00000000..37cda127 --- /dev/null +++ b/modules/local/baysor/run/tests/main.nf.test @@ -0,0 +1,67 @@ +nextflow_process { + + name "Test Process BAYSOR RUN" + script "../main.nf" + process "BAYSOR_RUN" + + tag "modules" + tag "modules_local" + tag "baysor" + tag "baysor/run" + tag "segmentation" + tag "cell_segmentation" + + test("baysor run - transcripts.parquet") { + + when { + process { + """ + input[0] = channel.of([ + [id: "test_run_baysor"], + file("https://raw.githubusercontent.com/khersameesh24/test-datasets/baysor/transcripts.parquet", checkIfExists: true), + [], + file("https://raw.githubusercontent.com/khersameesh24/test-datasets/baysor/config/xenium.toml", checkIfExists: true), + 30 + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert file(process.out.segmentation[0][1]).exists() }, + { assert file(process.out.segmentation[0][1]).name == "segmentation.csv" }, + { assert file(process.out.segmentation[0][2]).exists() }, + { assert file(process.out.segmentation[0][2]).name == "segmentation_polygons_2d.json" }, + { assert snapshot(process.out.versions_baysor).match("versions") } + ) + } + } + + test("baysor run stub") { + + options "-stub" + + when { + process { + """ + input[0] = channel.of([ + [id: "test_run_baysor"], + file("https://raw.githubusercontent.com/khersameesh24/test-datasets/baysor/transcripts.parquet", checkIfExists: true), + [], + file("https://raw.githubusercontent.com/khersameesh24/test-datasets/baysor/config/xenium.toml", checkIfExists: true), + 30 + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions_baysor).match("versions_stub") } + ) + } + } +} diff --git a/modules/local/baysor/run/tests/main.nf.test.snap b/modules/local/baysor/run/tests/main.nf.test.snap new file mode 100644 index 00000000..73d72688 --- /dev/null +++ b/modules/local/baysor/run/tests/main.nf.test.snap @@ -0,0 +1,34 @@ +{ + "versions_stub": { + "content": [ + [ + [ + "BAYSOR_RUN", + "baysor", + "0.7.1" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-03-21T03:59:56.882483481" + }, + "versions": { + "content": [ + [ + [ + "BAYSOR_RUN", + "baysor", + "0.7.1" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-03-21T03:59:44.964721046" + } +} \ No newline at end of file diff --git a/modules/local/baysor/segfree/main.nf b/modules/local/baysor/segfree/main.nf new file mode 100644 index 00000000..eb41fa87 --- /dev/null +++ b/modules/local/baysor/segfree/main.nf @@ -0,0 +1,50 @@ +process BAYSOR_SEGFREE { + tag "${meta.id}" + label 'process_high' + + container "khersameesh24/baysor:0.7.1" + + input: + tuple val(meta), path(transcripts), path(config) + + output: + tuple val(meta), path("${prefix}/ncvs.loom"), emit: ncvs + tuple val("${task.process}"), val('baysor'), eval("baysor --version 2>&1 | grep -oP '\\d+\\.\\d+\\.\\d+' || echo unknown"), topic: versions, emit: versions_baysor + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("BAYSOR_SEGFREE module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + """ + export JULIA_NUM_THREADS=${task.cpus} + + mkdir -p ${prefix} + + baysor segfree \\ + ${transcripts} \\ + --config ${config} \\ + --output=${prefix}/ncvs.loom \\ + ${args} + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("BAYSOR_SEGFREE module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p ${prefix} + touch "${prefix}/ncvs.loom" + """ +} diff --git a/modules/local/baysor/segfree/meta.yml b/modules/local/baysor/segfree/meta.yml new file mode 100644 index 00000000..e8ccdad2 --- /dev/null +++ b/modules/local/baysor/segfree/meta.yml @@ -0,0 +1,75 @@ +name: "baysor_segfree" +description: Extract neighborhood composition vectors (NVCs) from a dataset. +keywords: + - neighborhood + - baysor + - segmentation_free +tools: + - "baysor": + description: "Baysor is a tool that segments cells using spatial gene expression maps. Optionally, segmentation masks can be given as additional input." + homepage: "https://kharchenkolab.github.io/Baysor/dev/" + documentation: "https://kharchenkolab.github.io/Baysor/dev/" + tool_dev_url: "https://github.com/kharchenkolab/Baysor" + doi: "https://doi.org/10.1038/s41587-021-01044-w" + licence: ["MIT license"] + identifier: + +## baysor_segfree requires a transcript map of the data and a configuration file with argument values +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - transcripts_csv: + type: file + description: CSV file + pattern: "*.csv" + - config_toml: + type: file + description: TOML file with config arguments + pattern: "*.toml" + +## neighborhood composition vectors +output: + ncvs: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "${prefix}/ncvs.loom": + type: file + description: | + Segmenation file in loom format + pattern: "${prefix}/ncvs.loom" + + versions_baysor: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "baysor --version 2>&1 | grep -oP '\\d+\\.\\d+\\.\\d+' || echo unknown": + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "baysor --version 2>&1 | grep -oP '\\d+\\.\\d+\\.\\d+' || echo unknown": + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@sebgoti8" + - "@khersameesh24" +maintainers: + - "@sebgoti8" + - "@khersameesh24" diff --git a/modules/local/baysor/segfree/tests/main.nf.test b/modules/local/baysor/segfree/tests/main.nf.test new file mode 100644 index 00000000..1fb7d2bc --- /dev/null +++ b/modules/local/baysor/segfree/tests/main.nf.test @@ -0,0 +1,60 @@ +nextflow_process { + + name "Test Process BAYSOR SEGFREE" + script "../main.nf" + process "BAYSOR_SEGFREE" + + tag "modules" + tag "modules_local" + tag "baysor" + tag "baysor/segfree" + tag "segmentation-free" + + test("baysor segfree - transcripts.parquet") { + + when { + process { + """ + input[0] = channel.of([ + [id: "test_run_baysor"], + file("https://raw.githubusercontent.com/khersameesh24/test-datasets/baysor/transcripts.parquet", checkIfExists: true), + file("https://raw.githubusercontent.com/khersameesh24/test-datasets/baysor/config/xenium.toml", checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert file(process.out.ncvs[0][1]).exists() }, + { assert file(process.out.ncvs[0][1]).name == "ncvs.loom" }, + { assert snapshot(process.out.versions_baysor).match("versions") } + ) + } + } + + test("baysor run stub") { + + options "-stub" + + when { + process { + """ + input[0] = channel.of([ + [id: "test_run_baysor"], + file("https://raw.githubusercontent.com/khersameesh24/test-datasets/baysor/transcripts.parquet", checkIfExists: true), + file("https://raw.githubusercontent.com/khersameesh24/test-datasets/baysor/config/xenium.toml", checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions_baysor).match("versions_stub") } + ) + } + } +} diff --git a/modules/local/baysor/segfree/tests/main.nf.test.snap b/modules/local/baysor/segfree/tests/main.nf.test.snap new file mode 100644 index 00000000..075e13ea --- /dev/null +++ b/modules/local/baysor/segfree/tests/main.nf.test.snap @@ -0,0 +1,34 @@ +{ + "versions_stub": { + "content": [ + [ + [ + "BAYSOR_SEGFREE", + "baysor", + "0.7.1" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-03-21T04:00:54.745153684" + }, + "versions": { + "content": [ + [ + [ + "BAYSOR_SEGFREE", + "baysor", + "0.7.1" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-03-21T04:00:42.542466144" + } +} \ No newline at end of file diff --git a/modules/local/ficture/model/main.nf b/modules/local/ficture/model/main.nf new file mode 100644 index 00000000..34c2dbc0 --- /dev/null +++ b/modules/local/ficture/model/main.nf @@ -0,0 +1,43 @@ +process FICTURE { + tag "$meta.id" + label 'process_high' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/08/08f94799a8abd47d274654c49ed5ae225811b8a64bc9788739f4c5d23fa08230/data' : + 'community.wave.seqera.io/library/pip_ficture:ad8a1265a51b53cf' }" + + input: + tuple val(meta), path(transcripts) + path(coordinate_minmax) + path(features) + + output: + tuple val(meta), path("results/**"), emit: results + tuple val("${task.process}"), val('ficture'), eval("pip show ficture | sed -n 's/^Version: //p'"), topic: versions, emit: versions_ficture + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def features_list = features ? "--in-feature ${features}": "" + + """ + ficture run_together \\ + --in-tsv ${transcripts} \\ + --in-minmax ${coordinate_minmax} \\ + ${features_list} \\ + --out-dir results \\ + --train-width 12,18 \\ + --n-factor 6,12 \\ + --n-jobs ${task.cpus} \\ + --plot-each-factor \\ + --all \\ + ${args} + """ + + stub: + """ + mkdir -p results/ + """ +} diff --git a/modules/local/ficture/model/meta.yml b/modules/local/ficture/model/meta.yml new file mode 100644 index 00000000..c957639e --- /dev/null +++ b/modules/local/ficture/model/meta.yml @@ -0,0 +1,72 @@ +name: ficture +description: FICTURE is a software tool that performs segmentation-free analysis of submicron-resolution analysis of spatial transcriptomics data. +keywords: + - spatial + - segmentation_free + - imaging + - ficture +tools: + - ficture: + description: | + FICTURE is a software tool that performs segmentation-free analysis of submicron-resolution + analysis of spatial transcriptomics data. This tool executes ficture itself. + homepage: "https://seqscope.github.io/ficture/" + documentation: "https://seqscope.github.io/ficture/" + tool_dev_url: "https://github.com/seqscope/ficture" + licence: + - "Attribution-NonCommercial 4.0 International" + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing run information + e.g. [id:'sample'] + - transcripts: + type: file + description: One file contains the molecular or pixel level information, the required columns are X, Y, gene, and Count. (There could be other columns in the file which would be ignored.) + - - coordinate_minmax: + type: file + description: We also prefer to keep a file listing the min and max of the coordinates (this is primarily for visualizing very big tissue region where we do not read all data at once but would want to know the image dimension). The unit of the coordinates is micrometer. + - - features: + type: file + description: Another file contains the (unique) names of genes that should be used in analysis. The required columns is just gene (including the header), the naming of genes should match the gene column in the transcript file. If your data contain negative control probes or if you would like to remove certain genes this is where you can specify. +output: + results: + - - meta: + type: map + description: | + Groovy Map containing run information + e.g. [id:'sample'] + - results: + type: file + description: Files containing the results of FICTURE + pattern: "${meta.id}/results/*" + versions_ficture: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "pip show ficture | sed -n 's/^Version: //p'": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "pip show ficture | sed -n 's/^Version: //p'": + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@khersameesh24" + - "@heylf" +maintainers: + - "@khersameesh24" + - "@heylf" diff --git a/modules/local/ficture/preprocess/main.nf b/modules/local/ficture/preprocess/main.nf new file mode 100644 index 00000000..30b103a3 --- /dev/null +++ b/modules/local/ficture/preprocess/main.nf @@ -0,0 +1,38 @@ +process FICTURE_PREPROCESS { + tag "$meta.id" + label 'process_high' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/08/08f94799a8abd47d274654c49ed5ae225811b8a64bc9788739f4c5d23fa08230/data' : + 'community.wave.seqera.io/library/pip_ficture:ad8a1265a51b53cf' }" + + input: + tuple val(meta), path(transcripts) + path(features) + + output: + tuple val(meta), path("*processed_transcripts.tsv.gz"), emit: transcripts + path("*coordinate_minmax.tsv") , emit: coordinate_minmax + path("*feature.clean.tsv.gz") , optional:true, emit: features + tuple val("${task.process}"), val('ficture'), eval("pip show ficture | sed -n 's/^Version: //p'"), topic: versions, emit: versions_ficture + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def features_arg = features ? "--features ${features}" : "" + + """ + ficture_preprocess.py \\ + --transcripts ${transcripts} \\ + ${features_arg} \\ + ${args} + """ + + stub: + """ + touch processed_transcripts.tsv.gz + touch coordinate_minmax.tsv + """ +} diff --git a/modules/local/ficture/preprocess/meta.yml b/modules/local/ficture/preprocess/meta.yml new file mode 100644 index 00000000..4147cc90 --- /dev/null +++ b/modules/local/ficture/preprocess/meta.yml @@ -0,0 +1,80 @@ +name: ficture_preprocessing +description: FICTURE is a software tool that performs segmentation-free analysis of submicron-resolution analysis of spatial transcriptomics data. +keywords: + - spatial + - segmentation_free + - imaging + - preprocessing + - ficture +tools: + - ficture: + description: | + FICTURE is a software tool that performs segmentation-free analysis of submicron-resolution + analysis of spatial transcriptomics data. This tool executes ficture itself. + homepage: "https://seqscope.github.io/ficture/" + documentation: "https://seqscope.github.io/ficture/" + tool_dev_url: "https://github.com/seqscope/ficture" + licence: + - "Attribution-NonCommercial 4.0 International" + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing run information + e.g. [id:'sample'] + - transcripts: + type: file + description: One file contains the molecular or pixel level information, the required columns are X, Y, gene, and Count. (There could be other columns in the file which would be ignored.) + - - features: + type: file + description: Another txt file that contains the (unique) names of genes that should be used in analysis. If your data contain negative control probes or if you would like to remove certain genes this is where you can specify. Just list the genes. One gene per line. No header. +output: + transcripts: + - - meta: + type: map + description: | + Groovy Map containing run information + e.g. [id:'sample'] + - "*processed_transcripts.tsv.gz": + type: file + description: Transcirpt file used for FICTURE + pattern: "processed_transcripts.tsv.gz" + coordinate_minmax: + - "*coordinate_minmax.tsv": + type: file + description: Listing the min and max of the coordinates used for FICTURE + pattern: "coordinate_minmax.tsv" + features: + - "*feature.clean.tsv.gz": + type: file + description: Another file contains the (unique) names of genes that should be used for FICUTRE + pattern: "feature.clean.tsv.gz" + versions_ficture: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "pip show ficture | sed -n 's/^Version: //p'": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "pip show ficture | sed -n 's/^Version: //p'": + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@khersameesh24" + - "@heylf" +maintainers: + - "@khersameesh24" + - "@heylf" diff --git a/modules/local/parquet_to_csv/main.nf b/modules/local/parquet_to_csv/main.nf new file mode 100644 index 00000000..26c613e3 --- /dev/null +++ b/modules/local/parquet_to_csv/main.nf @@ -0,0 +1,33 @@ +process PARQUET_TO_CSV { + tag "$meta.id" + label 'process_low' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/f9/f9c8f3a2de4e2aa94500011f7d7d09276e9b6f2d79ee8737c9098fe22d4649bc/data' : + 'community.wave.seqera.io/library/sopa_procps-ng_pyarrow:c9ce8cd2ede79d72' }" + + input: + tuple val(meta), path(parquet) + + output: + tuple val(meta), path("transcripts.csv"), emit: csv + tuple val("${task.process}"), val('pyarrow'), eval("python3 -c 'import pyarrow; print(pyarrow.__version__)'"), topic: versions, emit: versions_pyarrow + + when: + task.ext.when == null || task.ext.when + + script: + """ + python3 -c " +import pyarrow.parquet as pq +import pyarrow.csv as pa_csv +t = pq.read_table('${parquet}') +pa_csv.write_csv(t, 'transcripts.csv') +" + """ + + stub: + """ + touch transcripts.csv + """ +} diff --git a/modules/local/parquet_to_csv/meta.yml b/modules/local/parquet_to_csv/meta.yml new file mode 100644 index 00000000..c3ab2506 --- /dev/null +++ b/modules/local/parquet_to_csv/meta.yml @@ -0,0 +1,70 @@ +name: "parquet_to_csv" +description: Convert parquet file to CSV for tools with old Parquet readers. +keywords: + - xenium + - parquet + - csv + - transcripts +tools: + - "pyarrow": + description: | + Python library providing a Pythonic API for Apache Arrow, + including fast Parquet and CSV I/O. + homepage: "https://arrow.apache.org/docs/python/" + documentation: "https://arrow.apache.org/docs/python/" + tool_dev_url: "https://github.com/apache/arrow" + doi: "no DOI available" + licence: ["Apache-2.0"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - parquet: + type: file + description: Parquet file to convert. + pattern: "*.parquet" + ontologies: [] + +output: + csv: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "transcripts.csv": + type: file + description: Converted CSV file. + pattern: "transcripts.csv" + ontologies: [] + versions_pyarrow: + - - ${task.process}: + type: string + description: The process the versions were collected from + - pyarrow: + type: string + description: The tool name + - "python3 -c 'import pyarrow; print(pyarrow.__version__)'": + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - pyarrow: + type: string + description: The tool name + - "python3 -c 'import pyarrow; print(pyarrow.__version__)'": + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@an-altosian" +maintainers: + - "@an-altosian" diff --git a/modules/local/proseg/preset/main.nf b/modules/local/proseg/preset/main.nf new file mode 100644 index 00000000..553801ba --- /dev/null +++ b/modules/local/proseg/preset/main.nf @@ -0,0 +1,65 @@ +process PROSEG { + tag "${meta.id}" + label 'process_high' + + container "ghcr.io/dcjones/proseg:v3.1.0" + + input: + tuple val(meta), path(transcripts) + + output: + tuple val(meta), path("${prefix}/cell-polygons.geojson.gz"), path("${prefix}/transcript-metadata.csv.gz"), emit: seg_outs + tuple val(meta), path("${prefix}/proseg-output.zarr"), emit: zarr + tuple val("${task.process}"), val('proseg'), eval("proseg --version | sed 's/proseg //'"), topic: versions, emit: versions_proseg + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("PROSEG module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p ${prefix} + + proseg \\ + ${args} \\ + ${transcripts} \\ + --nthreads ${task.cpus} \\ + --output-expected-counts ${prefix}/expected-counts.csv.gz \\ + --output-cell-metadata ${prefix}/cell-metadata.csv.gz \\ + --output-transcript-metadata ${prefix}/transcript-metadata.csv.gz \\ + --output-gene-metadata ${prefix}/gene-metadata.csv.gz \\ + --output-rates ${prefix}/rates.csv.gz \\ + --output-cell-polygons ${prefix}/cell-polygons.geojson.gz \\ + --output-cell-polygon-layers ${prefix}/cell-polygons-layers.geojson.gz \\ + --output-union-cell-polygons ${prefix}/union-cell-polygons.geojson.gz \\ + --output-spatialdata ${prefix}/proseg-output.zarr + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("PROSEG module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p ${prefix}/ + touch "${prefix}/expected-counts.csv.gz" + touch "${prefix}/cell-metadata.csv.gz" + touch "${prefix}/transcript-metadata.csv.gz" + touch "${prefix}/gene-metadata.csv.gz" + touch "${prefix}/rates.csv.gz" + touch "${prefix}/cell-polygons.geojson.gz" + touch "${prefix}/cell-polygons-layers.geojson.gz" + touch "${prefix}/union-cell-polygons.geojson.gz" + mkdir -p "${prefix}/proseg-output.zarr" + """ +} diff --git a/modules/local/proseg/preset/meta.yml b/modules/local/proseg/preset/meta.yml new file mode 100644 index 00000000..524b5ca0 --- /dev/null +++ b/modules/local/proseg/preset/meta.yml @@ -0,0 +1,76 @@ +name: "proseg" +description: Probabilistic cell segmentation for in situ spatial transcriptomics +keywords: + - segmentation + - cell segmentation + - spatialomics + - probabilistic segmentation + - in situ spatial transcriptomics +tools: + - "proseg": + description: "Proseg (probabilistic segmentation) is a cell segmentation method for in situ spatial transcriptomics. Xenium, CosMx, and MERSCOPE platforms are currently supported." + homepage: "https://github.com/dcjones/proseg/tree/main" + documentation: "https://github.com/dcjones/proseg/blob/main/README.md" + tool_dev_url: "https://github.com/dcjones/proseg" + doi: "" + licence: ["GNU Public License"] + +input: + - - meta: + type: map + description: | + Groovy Map containing run information + e.g. `[ id:'run_id']` + - transcripts: + type: file + description: | + File containing the transcript position + pattern: "transcripts.csv.gz" + +output: + - - meta: + type: map + description: | + Groovy Map containing run information + e.g. `[ id:'run_id']` + - cell_polygons: + type: file + description: 2D polygons for each cell in GeoJSON format. These are flattened from 3D + pattern: "cell-polygons.geojson.gz" + - - expected_counts: + type: file + description: cell-by-gene count matrix + pattern: "expected-counts.csv.gz" + - - cell_metadata: + type: file + description: Cell centroids, volume, and other information + pattern: "cell-metadata.csv.gz" + - - transcript_metadata: + type: file + description: Transcript ids, genes, revised positions, assignment probability + pattern: "transcript-metadata.csv.gz" + - - gene_metadata: + type: file + description: Per-gene summary statistics + pattern: "gene-metadata.csv.gz" + - - rates: + type: file + description: Cell-by-gene Poisson rate parameters + pattern: "rates.csv.gz" + - - cell_polygon_layers: + type: file + description: A separate, non-overlapping cell polygon for each z-layer, preserving 3D segmentation + pattern: "cell-polygons-layers.geojson.gz" + - - cell_hulls: + type: file + description: Convex hulls around assigned transcripts + pattern: "cell-hulls.geojson.gz" + - - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@khersameesh24" +maintainers: + - "@khersameesh24" diff --git a/modules/local/proseg/preset/tests/main.nf.test b/modules/local/proseg/preset/tests/main.nf.test new file mode 100644 index 00000000..baa48c10 --- /dev/null +++ b/modules/local/proseg/preset/tests/main.nf.test @@ -0,0 +1,61 @@ +nextflow_process { + + name "Test Process PROSEG" + script "../main.nf" + process "PROSEG" + + tag "modules" + tag "modules_nfcore" + tag "proseg" + tag "proseg/proseg" + tag "segmentation" + tag "cell_segmentation" + + test("proseg - transcripts.csv") { + + when { + process { + """ + input[0] = [ + [id: "test_run_proseg"], + file(params.modules_testdata_base_path + "spatial_omics/xenium/homo_sapiens/spatial_gene_expression.csv", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert file(process.out.seg_outs[0][1]).exists() }, + { assert file(process.out.seg_outs[0][1]).name == "cell-polygons.geojson.gz" }, + { assert file(process.out.seg_outs[0][2]).exists() }, + { assert file(process.out.seg_outs[0][2]).name == "transcript-metadata.csv.gz" }, + { assert snapshot(process.out.versions_proseg).match("versions") } + ) + } + } + + test("proseg stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [id: "test_run_proseg"], + file(params.modules_testdata_base_path + "spatial_omics/xenium/homo_sapiens/spatial_gene_expression.csv", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions_proseg).match("versions_stub") } + ) + } + } +} diff --git a/modules/local/proseg/preset/tests/main.nf.test.snap b/modules/local/proseg/preset/tests/main.nf.test.snap new file mode 100644 index 00000000..944325aa --- /dev/null +++ b/modules/local/proseg/preset/tests/main.nf.test.snap @@ -0,0 +1,34 @@ +{ + "versions_stub": { + "content": [ + [ + [ + "PROSEG", + "proseg", + "3.1.0" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-03-21T04:01:19.610456233" + }, + "versions": { + "content": [ + [ + [ + "PROSEG", + "proseg", + "3.1.0" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-03-21T04:01:12.114004958" + } +} \ No newline at end of file diff --git a/modules/local/proseg/preset/tests/nextflow.config b/modules/local/proseg/preset/tests/nextflow.config new file mode 100644 index 00000000..f8b3a30a --- /dev/null +++ b/modules/local/proseg/preset/tests/nextflow.config @@ -0,0 +1,9 @@ +process { + + resourceLimits = [ + cpus: 4, + memory: '8.GB', + time: '2.h', + ] + +} diff --git a/modules/local/proseg/proseg2baysor/main.nf b/modules/local/proseg/proseg2baysor/main.nf new file mode 100644 index 00000000..1a0c8b38 --- /dev/null +++ b/modules/local/proseg/proseg2baysor/main.nf @@ -0,0 +1,47 @@ +process PROSEG2BAYSOR { + tag "$meta.id" + label 'process_high' + + container "ghcr.io/dcjones/proseg:v3.1.0" + + input: + tuple val(meta), path(zarr_dir) + + output: + tuple val(meta), path("${prefix}/cell-polygons.geojson") , emit: xr_polygons + tuple val(meta), path("${prefix}/transcript-metadata.csv"), emit: xr_metadata + tuple val("${task.process}"), val('proseg'), eval("proseg --version | sed 's/proseg //'"), topic: versions, emit: versions_proseg + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "PROSEG2BAYSOR module does not support Conda. Please use Docker / Singularity / Podman instead." + } + + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p ${prefix} + + proseg-to-baysor \\ + ${zarr_dir} \\ + --output-transcript-metadata ${prefix}/transcript-metadata.csv \\ + --output-cell-polygons ${prefix}/cell-polygons.geojson \\ + ${args} + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "PROSEG2BAYSOR module does not support Conda. Please use Docker / Singularity / Podman instead." + } + + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p ${prefix} + touch "${prefix}/transcript-metadata.csv" + touch "${prefix}/cell-polygons.geojson" + """ +} diff --git a/modules/local/proseg/proseg2baysor/meta.yml b/modules/local/proseg/proseg2baysor/meta.yml new file mode 100644 index 00000000..3d5cfac4 --- /dev/null +++ b/modules/local/proseg/proseg2baysor/meta.yml @@ -0,0 +1,80 @@ +name: "proseg2baysor" +description: Probabilistic cell segmentation for in situ spatial transcriptomics +keywords: + - segmentation + - cell segmentation + - spatialomics + - probabilistic segmentation + - in situ spatial transcriptomics +tools: + - "proseg": + description: "Proseg (probabilistic segmentation) is a cell segmentation method for in situ spatial transcriptomics. Xenium, CosMx, and MERSCOPE platforms are currently supported." + homepage: "https://github.com/dcjones/proseg/tree/main" + documentation: "https://github.com/dcjones/proseg/blob/main/README.md" + tool_dev_url: "https://github.com/dcjones/proseg" + doi: "" + licence: ["GNU Public License"] + +input: + - - meta: + type: map + description: | + Groovy Map containing run information + e.g. `[ id:'run_id']` + - cell_polygons: + type: file + description: | + Cell polygons output file from the proseg xenium (format) run + pattern: "cell-polygons.geojson.gz" + - - transcript_metadata: + type: file + description: | + Transcript metadata file output file from the proseg xenium (format) run + pattern: "transcript-metadata.csv.gz" +output: + xr_polygons: + - - meta: + type: map + description: | + Groovy Map containing run information + e.g. `[ id:'run_id']` + - "${prefix}/cell-polygons.geojson": + type: file + description: 2D polygons for each cell in GeoJSON format. These are flattened from 3D + pattern: "xr-cell-polygons.geojson" + xr_metadata: + - - meta: + type: map + description: | + Groovy Map containing run information + e.g. `[ id:'run_id']` + - "${prefix}/transcript-metadata.csv": + type: file + description: Transcript ids, genes, revised positions, assignment probability + pattern: "xr-transcript-metadata.csv" + versions_proseg: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "proseg --version | sed 's/proseg //'": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "proseg --version | sed 's/proseg //'": + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@khersameesh24" +maintainers: + - "@khersameesh24" diff --git a/modules/local/proseg/proseg2baysor/tests/main.nf.test b/modules/local/proseg/proseg2baysor/tests/main.nf.test new file mode 100644 index 00000000..039217e0 --- /dev/null +++ b/modules/local/proseg/proseg2baysor/tests/main.nf.test @@ -0,0 +1,72 @@ +nextflow_process { + + name "Test Process PROSEG" + script "../main.nf" + process "PROSEG2BAYSOR" + + tag "modules" + tag "modules_nfcore" + tag "proseg" + tag "segmentation" + tag "cell_segmentation" + + + setup { + run("PROSEG") { + script "modules/local/proseg/preset/main.nf" + process { + """ + input[0] = [ + [id: "test_run_proseg"], + file(params.modules_testdata_base_path + "spatial_omics/xenium/homo_sapiens/spatial_gene_expression.csv", checkIfExists: true) + ] + """ + } + } + } + + test("proseg2baysor - cell_polygons, transcript_metadata") { + + when { + process { + """ + input[0] = channel.of([ + [id: "test_run_proseg2baysor"], + ]).combine(PROSEG.out.seg_outs, by: 0) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("proseg2baysor stub") { + + options "-stub" + + when { + process { + """ + input[0] = channel.of([ + [id: "test_run_proseg2baysor"], + ]).combine(PROSEG.out.seg_outs, by: 0) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/local/proseg/proseg2baysor/tests/main.nf.test.snap b/modules/local/proseg/proseg2baysor/tests/main.nf.test.snap new file mode 100644 index 00000000..7dff8302 --- /dev/null +++ b/modules/local/proseg/proseg2baysor/tests/main.nf.test.snap @@ -0,0 +1,60 @@ +{ + "proseg2baysor - cell_polygons, transcript_metadata": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + + ], + "versions_proseg": [ + + ], + "xr_metadata": [ + + ], + "xr_polygons": [ + + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-03-21T03:50:54.118409704" + }, + "proseg2baysor stub": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + + ], + "versions_proseg": [ + + ], + "xr_metadata": [ + + ], + "xr_polygons": [ + + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-03-21T03:51:01.546798675" + } +} \ No newline at end of file diff --git a/modules/local/proseg/proseg2baysor/tests/nextflow.config b/modules/local/proseg/proseg2baysor/tests/nextflow.config new file mode 100644 index 00000000..f8b3a30a --- /dev/null +++ b/modules/local/proseg/proseg2baysor/tests/nextflow.config @@ -0,0 +1,9 @@ +process { + + resourceLimits = [ + cpus: 4, + memory: '8.GB', + time: '2.h', + ] + +} diff --git a/modules/local/resolift/main.nf b/modules/local/resolift/main.nf new file mode 100644 index 00000000..f5f22d09 --- /dev/null +++ b/modules/local/resolift/main.nf @@ -0,0 +1,46 @@ +process RESOLIFT { + tag "${meta.id}" + label 'process_low' + + container "khersameesh24/resolift:1.0.0" + + input: + tuple val(meta), path(morphology_tiff) + + output: + tuple val(meta), path("${prefix}/morphology.ome.enhanced.tiff"), emit: enhanced_tiff + tuple val("${task.process}"), val('resolift'), eval("pip show resolift | sed -n 's/^Version: //p'"), topic: versions, emit: versions_resolift + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("RESOLIFT module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p ${prefix} + + resolift \\ + -i ${morphology_tiff} \\ + -o ${prefix}/morphology.ome.enhanced.tiff \\ + ${args} + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("RESOLIFT module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p ${prefix} + touch "${prefix}/morphology.ome.enhanced.tiff" + """ +} diff --git a/modules/local/resolift/meta.yml b/modules/local/resolift/meta.yml new file mode 100644 index 00000000..c6f3fbb0 --- /dev/null +++ b/modules/local/resolift/meta.yml @@ -0,0 +1,64 @@ +name: "resolift" +description: Upscale large TIFFs using chunk-based image processing and sharpening. +keywords: + - segmentation + - spatialomics + - image based segmentation +tools: + - "resolift": + description: "Upscale large TIFFs using chunk-based image processing and sharpening." + homepage: "https://github.com/khersameesh24/ResoLift" + documentation: "https://github.com/khersameesh24/ResoLift" + tool_dev_url: "https://github.com/khersameesh24/ResoLift" + doi: "" + licence: ["GNU Public License"] + +input: + - - meta: + type: map + description: | + Groovy Map containing run information + e.g. `[ id:'run_id']` + - input: + type: file + description: | + Path to the input TIFF file. + pattern: ".tiff" + +output: + enhanced_tiff: + - - meta: + type: map + description: | + Groovy Map containing run information + e.g. `[ id:'run_id']` + - "${prefix}/morphology.ome.enhanced.tiff": + type: file + description: Path to save the upscaled TIFF file. + pattern: ".tiff" + versions_resolift: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "pip show resolift | sed -n 's/^Version: //p'": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "pip show resolift | sed -n 's/^Version: //p'": + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@kubranarci" +maintainers: + - "@kubranarci" diff --git a/modules/local/resolift/tests/main.nf.test b/modules/local/resolift/tests/main.nf.test new file mode 100644 index 00000000..7130d733 --- /dev/null +++ b/modules/local/resolift/tests/main.nf.test @@ -0,0 +1,60 @@ +nextflow_process { + + name "Test Process RESOLIFT" + script "../main.nf" + process "RESOLIFT" + config "./nextflow.config" + + tag "modules" + tag "modules_local" + tag "resolift" + tag "segmentation" + tag "cell_segmentation" + + test("resolift tif") { + + when { + process { + """ + input[0] = [ + [ id:'test_run' ], + file(params.modules_testdata_base_path + 'imaging/segmentation/nuclear_image.tif', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + + } + + test("resolift stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test_run' ], + file(params.modules_testdata_base_path + 'imaging/segmentation/nuclear_image.tif', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/local/resolift/tests/main.nf.test.snap b/modules/local/resolift/tests/main.nf.test.snap new file mode 100644 index 00000000..8972d043 --- /dev/null +++ b/modules/local/resolift/tests/main.nf.test.snap @@ -0,0 +1,84 @@ +{ + "resolift stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_run" + }, + "morphology.ome.enhanced.tiff:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "RESOLIFT", + "resolift", + "1.0" + ] + ], + "enhanced_tiff": [ + [ + { + "id": "test_run" + }, + "morphology.ome.enhanced.tiff:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_resolift": [ + [ + "RESOLIFT", + "resolift", + "1.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-03-22T16:00:00.000000000" + }, + "resolift tif": { + "content": [ + { + "0": [ + [ + { + "id": "test_run" + }, + "morphology.ome.enhanced.tiff:md5,a9feec67b317d8e5c5c93bc45d5a8763" + ] + ], + "1": [ + [ + "RESOLIFT", + "resolift", + "1.0" + ] + ], + "enhanced_tiff": [ + [ + { + "id": "test_run" + }, + "morphology.ome.enhanced.tiff:md5,a9feec67b317d8e5c5c93bc45d5a8763" + ] + ], + "versions_resolift": [ + [ + "RESOLIFT", + "resolift", + "1.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-03-22T16:00:00.000000000" + } +} \ No newline at end of file diff --git a/modules/local/resolift/tests/nextflow.config b/modules/local/resolift/tests/nextflow.config new file mode 100644 index 00000000..f8b3a30a --- /dev/null +++ b/modules/local/resolift/tests/nextflow.config @@ -0,0 +1,9 @@ +process { + + resourceLimits = [ + cpus: 4, + memory: '8.GB', + time: '2.h', + ] + +} diff --git a/modules/local/segger/Dockerfile b/modules/local/segger/Dockerfile new file mode 100644 index 00000000..e907e350 --- /dev/null +++ b/modules/local/segger/Dockerfile @@ -0,0 +1,52 @@ +# GPU image for segger segmentation +# Base: PyTorch with CUDA 12.4 (consistent CUDA support) +FROM pytorch/pytorch:2.5.1-cuda12.4-cudnn9-runtime + +# Install system dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + git \ + procps \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +# Upgrade pip +RUN pip install --no-cache-dir --upgrade pip + +# Clone and install segger from fork with CUDA 12 support +# Using fork with fixed API (is_token_based, num_node_features) and data module exports +WORKDIR /workspace +RUN git clone https://github.com/an-altosian/segger_dev.git /workspace/segger_dev && \ + cd /workspace/segger_dev && \ + pip install --no-cache-dir -e ".[cuda12]" + +# Install missing dependencies for data.io module and training +RUN pip install --no-cache-dir dask-geopandas lightning pytorch_lightning + +# Fix squidpy/anndata compatibility - upgrade squidpy to version compatible with anndata 0.10+ +RUN pip install --no-cache-dir "squidpy>=1.4.0" + +# Make NVRTC discoverable for CuPy (needs libnvrtc.so.12 for JIT kernel compilation) +# The nvidia-cuda-nvrtc-cu12 pip package installs it under site-packages. +# Use both ldconfig AND symlinks for maximum reliability (ldconfig may not survive +# Wave container augmentation on Seqera Platform). +RUN NVRTC_LIB=$(python -c "import nvidia.cuda_nvrtc, pathlib; print(pathlib.Path(nvidia.cuda_nvrtc.__file__).parent / 'lib')") && \ + echo "$NVRTC_LIB" > /etc/ld.so.conf.d/nvidia-nvrtc.conf && \ + ln -sf "$NVRTC_LIB"/libnvrtc* /usr/lib/ && \ + ldconfig && \ + echo "NVRTC registered at: $NVRTC_LIB" && \ + ldconfig -p | grep nvrtc + +# Install faiss-cpu for vector search (faiss-gpu not available via pip) +# Note: Removed cuvs-cu12 and nvidia library force-reinstalls as they break PyTorch CUDA detection +RUN pip install --no-cache-dir faiss-cpu + +# Verify PyTorch was built with CUDA support (doesn't require GPU at build time) +RUN python -c "import torch; print(f'PyTorch version: {torch.__version__}'); print(f'CUDA compiled: {torch.version.cuda}')" + +# Set environment variables for segger +ENV PYTHONPATH=/workspace/segger_dev/src:$PYTHONPATH +ENV PATH="$PATH:/workspace/segger_dev/src/segger/cli/" +ENV CUPY_CACHE_DIR="/tmp/cupy_cache" + +# Set default shell +CMD ["/bin/bash"] diff --git a/modules/local/segger/create_dataset/main.nf b/modules/local/segger/create_dataset/main.nf new file mode 100644 index 00000000..d7a31c11 --- /dev/null +++ b/modules/local/segger/create_dataset/main.nf @@ -0,0 +1,49 @@ +process SEGGER_CREATE_DATASET { + tag "${meta.id}" + label 'process_xl' + + container "quay.io/dongzehe/segger:1.0.14" + + input: + tuple val(meta), path(base_dir) + + output: + tuple val(meta), path("${prefix}/"), emit: datasetdir + tuple val("${task.process}"), val('segger'), eval("pip show segger | sed -n 's/^Version: //p'"), topic: versions, emit: versions_segger + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("SEGGER_CREATE_DATASET module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + """ + export NUMBA_CACHE_DIR=\$PWD/.numba_cache + mkdir -p \$NUMBA_CACHE_DIR + + segger_create_dataset.py \\ + --bundle-dir ${base_dir} \\ + --output-dir ${prefix} \\ + --n-workers ${task.cpus} \\ + ${args} + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("SEGGER_CREATE_DATASET module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p ${prefix}/ + touch "${prefix}/fake_file.txt" + """ +} diff --git a/modules/local/segger/create_dataset/meta.yml b/modules/local/segger/create_dataset/meta.yml new file mode 100644 index 00000000..1f948299 --- /dev/null +++ b/modules/local/segger/create_dataset/meta.yml @@ -0,0 +1,63 @@ +name: "segger_create_dataset" +description: Create a segger compatible dataset to speed up training and prediction of cell segmentations. +keywords: + - segmentation + - xenium + - imaging +tools: + - "segger": + description: "Segger uses graph neural networks and heterogeneous graphs to offer efficient cell segmentation at unmatched precision and accuracy." + homepage: "https://github.com/EliHei2/segger_dev" + documentation: "https://elihei2.github.io/segger_dev/user_guide/" + tool_dev_url: "https://github.com/EliHei2/segger_dev" + doi: "tbd" + licence: ["MIT"] + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'segger_run_id' ]` + - base_dir: + type: file + description: | + Directory containing the raw dataset (e.g., transcripts, boundaries). +output: + datasetdir: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'segger_run_id' ]` + - "${prefix}/": + description: | + Directory to save the processed Segger dataset (in PyTorch Geometric format). + versions_segger: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "pip show segger | sed -n 's/^Version: //p'": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "pip show segger | sed -n 's/^Version: //p'": + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@tobiaspk" + - "@khersameesh24" +maintainers: + - "@tobiaspk" + - "@khersameesh24" diff --git a/modules/local/segger/predict/main.nf b/modules/local/segger/predict/main.nf new file mode 100644 index 00000000..f7be984e --- /dev/null +++ b/modules/local/segger/predict/main.nf @@ -0,0 +1,51 @@ +process SEGGER_PREDICT { + tag "${meta.id}" + label 'process_xl' + label 'process_gpu' + + container "quay.io/dongzehe/segger:1.0.14" + + input: + tuple val(meta), path(segger_dataset) + path models_dir + path transcripts + + output: + tuple val(meta), path("benchmarks_dir"), emit: benchmarks + tuple val(meta), path("benchmarks_dir/*/segger_transcripts.parquet"), emit: transcripts + tuple val("${task.process}"), val('segger'), eval("pip show segger | sed -n 's/^Version: //p'"), topic: versions, emit: versions_segger + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("SEGGER_PREDICT module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + segger_predict.py \\ + --models-dir ${models_dir} \\ + --segger-data-dir ${segger_dataset} \\ + --transcripts-file ${transcripts} \\ + --benchmarks-dir benchmarks_dir \\ + --num-workers ${task.cpus} \\ + ${args} + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("SEGGER_PREDICT module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p "benchmarks_dir" + touch "benchmarks_dir/fake_file.txt" + """ +} diff --git a/modules/local/segger/predict/meta.yml b/modules/local/segger/predict/meta.yml new file mode 100644 index 00000000..777f768a --- /dev/null +++ b/modules/local/segger/predict/meta.yml @@ -0,0 +1,86 @@ +name: "segger_predict" +description: Predict cell segmentation on new data using a trained Segger model. +keywords: + - segmentation + - xenium + - imaging + - prediction +tools: + - "segger": + description: "Segger uses graph neural networks and heterogeneous graphs to offer efficient cell segmentation at unmatched precision and accuracy." + homepage: "https://github.com/EliHei2/segger_dev" + documentation: "https://elihei2.github.io/segger_dev/user_guide/" + tool_dev_url: "https://github.com/EliHei2/segger_dev" + doi: "tbd" + licence: ["MIT"] + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'segger_run_id' ]` + - segger_dataset: + type: file + description: | + File or directory with input data to run predictions on. + - - models_dir: + type: directory + description: | + Directory containing the trained model for inference. + - - transcripts_file: + type: file + description: | + Path to the transcripts.parquet file. + - - benchmarks_dir: + type: directory + description: | + Directory to save the prediction outputs. +output: + benchmarks: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'segger_run_id' ]` + - "benchmarks_dir": + description: | + Directory to save the segmentation results, including cell boundaries and associations. + transcripts: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'segger_run_id' ]` + - "benchmarks_dir/*/segger_transcripts.parquet": + type: file + description: | + Path to the transcripts.parquet file. + versions_segger: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "pip show segger | sed -n 's/^Version: //p'": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "pip show segger | sed -n 's/^Version: //p'": + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@tobiaspk" + - "@khersameesh24" +maintainers: + - "@tobiaspk" + - "@khersameesh24" diff --git a/modules/local/segger/train/main.nf b/modules/local/segger/train/main.nf new file mode 100644 index 00000000..c441d186 --- /dev/null +++ b/modules/local/segger/train/main.nf @@ -0,0 +1,66 @@ +process SEGGER_TRAIN { + tag "${meta.id}" + label 'process_xl' + label 'process_gpu' + + container "quay.io/dongzehe/segger:1.0.14" + + input: + tuple val(meta), path(dataset_dir) + + output: + tuple val(meta), path("trained_models"), emit: trained_models + tuple val("${task.process}"), val('segger'), eval("pip show segger | sed -n 's/^Version: //p'"), topic: versions, emit: versions_segger + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("SEGGER_TRAIN module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + def args = task.ext.args ?: '' + def script_path = "/workspace/segger_dev/src/segger/cli/train_model.py" + prefix = task.ext.prefix ?: "${meta.id}" + def gpu_count = 2 * task.attempt + def cuda_visible = gpu_count == 1 ? "export CUDA_VISIBLE_DEVICES=0" : "" + def accelerator = task.accelerator ? 'gpu' : 'auto' + + """ + # Set numba cache directory to avoid caching issues in container + export NUMBA_CACHE_DIR=\$PWD/.numba_cache + mkdir -p \$NUMBA_CACHE_DIR + + # GPU detection logging + echo "=== GPU Detection (SEGGER_TRAIN) ===" + echo "Requested devices: ${gpu_count} (attempt ${task.attempt})" + echo "Accelerator: ${accelerator}" + nvidia-smi 2>/dev/null && echo "GPU available: yes" || echo "GPU available: no (nvidia-smi failed)" + python3 -c "import torch; print(f'PyTorch CUDA available: {torch.cuda.is_available()}'); print(f'CUDA device count: {torch.cuda.device_count()}')" 2>/dev/null || echo "PyTorch CUDA check failed" + echo "====================================" + + ${cuda_visible} + python3 ${script_path} \\ + --dataset_dir ${dataset_dir} \\ + --models_dir trained_models \\ + --sample_tag ${prefix} \\ + --devices ${gpu_count} \\ + --accelerator ${accelerator} \\ + ${args} + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("SEGGER_TRAIN module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p trained_models/ + touch trained_models/fakefile.txt + """ +} diff --git a/modules/local/segger/train/meta.yml b/modules/local/segger/train/meta.yml new file mode 100644 index 00000000..7127cdeb --- /dev/null +++ b/modules/local/segger/train/meta.yml @@ -0,0 +1,65 @@ +name: "segger_train_model" +description: Train a Segger cell segmentation model using a segger-created dataset. +keywords: + - segmentation + - xenium + - imaging + - model_training +tools: + - "segger": + description: "Segger uses graph neural networks and heterogeneous graphs to offer efficient cell segmentation at unmatched precision and accuracy." + homepage: "https://github.com/EliHei2/segger_dev" + documentation: "https://elihei2.github.io/segger_dev/user_guide/" + tool_dev_url: "https://github.com/EliHei2/segger_dev" + doi: "tbd" + licence: ["MIT"] + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'segger_run_id' ]` + - dataset_dir: + type: file + description: | + Directory to segger created dataset(s). +output: + trained_models: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'segger_run_id' ]` + - "trained_models": + type: directory + description: | + Directory to save the trained model and checkpoints. + versions_segger: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "pip show segger | sed -n 's/^Version: //p'": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "pip show segger | sed -n 's/^Version: //p'": + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@tobiaspk" + - "@khersameesh24" +maintainers: + - "@tobiaspk" + - "@khersameesh24" diff --git a/modules/local/spatialdata/merge/main.nf b/modules/local/spatialdata/merge/main.nf new file mode 100644 index 00000000..db614000 --- /dev/null +++ b/modules/local/spatialdata/merge/main.nf @@ -0,0 +1,48 @@ +process SPATIALDATA_MERGE { + tag "${meta.id}" + label 'process_high_memory' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/cb/cb8fc03fa657c164c5d83f075578bbb5d9c10f1178165f94e94f33c67efca1a1/data' : + 'community.wave.seqera.io/library/spatialdata-io_spatialdata:b264928c30680e87' }" + + input: + tuple val(meta), path(raw_bundle, stageAs: "*"), path(redefined_bundle, stageAs: "*") + val(outputfolder) + + output: + tuple val(meta), path("spatialdata/${prefix}/${outputfolder}"), emit: merged_bundle + tuple val("${task.process}"), val('spatialdata'), eval("pip show spatialdata | sed -n 's/^Version: //p'"), topic: versions, emit: versions_spatialdata + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("SPATIALDATA_MERGE module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + prefix = task.ext.prefix ?: "${meta.id}" + + """ + spatialdata_merge.py \\ + --raw-bundle ${raw_bundle} \\ + --redefined-bundle ${redefined_bundle} \\ + --prefix ${prefix} \\ + --output-folder ${outputfolder} + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("SPATIALDATA_MERGE module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p "spatialdata/${prefix}/${outputfolder}/" + touch "spatialdata/${prefix}/${outputfolder}/fake_file.txt" + """ +} diff --git a/modules/local/spatialdata/merge/meta.yml b/modules/local/spatialdata/merge/meta.yml new file mode 100644 index 00000000..248d5bc2 --- /dev/null +++ b/modules/local/spatialdata/merge/meta.yml @@ -0,0 +1,44 @@ +name: spatialdata_merge +description: Merge several SpatialData bundles together. +keywords: + - data + - spatialomics +tools: + - custom: + description: An open and universal framework for processing spatial omics data + homepage: https://github.com/scverse/spatialdata + documentation: https://spatialdata.scverse.org/en/latest/ + licence: ["BSD 3-Clause"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample' ] + - ref_bundle: + type: folder + description: | + Path to reference (raw) spatialdata bundle. + - add_bundle: + type: folder + description: | + Path to spatialdata bundle that should be added. + +output: + - spatialaxe_bundle: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample' ] + - "spatialdata_spatialaxe": + type: folder + description: Spatialdata folder + pattern: "${meta.id}/spatialdata_spatialaxe/*" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@heylf" diff --git a/modules/local/spatialdata/meta/main.nf b/modules/local/spatialdata/meta/main.nf new file mode 100644 index 00000000..714bf797 --- /dev/null +++ b/modules/local/spatialdata/meta/main.nf @@ -0,0 +1,49 @@ +process SPATIALDATA_META { + tag "${meta.id}" + label 'process_high' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/cb/cb8fc03fa657c164c5d83f075578bbb5d9c10f1178165f94e94f33c67efca1a1/data' : + 'community.wave.seqera.io/library/spatialdata-io_spatialdata:b264928c30680e87' }" + + input: + tuple val(meta), path(spatialdata_bundle, stageAs: "*"), path(xenium_bundle, stageAs: "*") + val(outputfolder) + + output: + tuple val(meta), path("spatialdata/${prefix}/${outputfolder}"), emit: metadata + tuple val("${task.process}"), val('spatialdata'), eval("pip show spatialdata | sed -n 's/^Version: //p'"), topic: versions, emit: versions_spatialdata + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("SPATIALDATA_META module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + prefix = task.ext.prefix ?: "${meta.id}" + + """ + spatialdata_meta.py \\ + --spatialdata-bundle ${spatialdata_bundle} \\ + --xenium-bundle ${xenium_bundle} \\ + --prefix ${prefix} \\ + --metadata '${meta}' \\ + --output-folder ${outputfolder} + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("SPATIALDATA_META module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p "spatialdata/${prefix}/${outputfolder}/" + touch "spatialdata/${prefix}/${outputfolder}/fake_file.txt" + """ +} diff --git a/modules/local/spatialdata/meta/meta.yml b/modules/local/spatialdata/meta/meta.yml new file mode 100644 index 00000000..8450c932 --- /dev/null +++ b/modules/local/spatialdata/meta/meta.yml @@ -0,0 +1,44 @@ +name: spatialdata_meta +description: Add metadata to a SpatialData bundle. +keywords: + - data + - spatialomics +tools: + - custom: + description: An open and universal framework for processing spatial omics data + homepage: https://github.com/scverse/spatialdata + documentation: https://spatialdata.scverse.org/en/latest/ + licence: ["BSD 3-Clause"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample' ] + - spatialdata_bundle: + type: folder + description: | + Path to spatialdata bundle. + - xenium_bundle: + type: folder + description: | + Path to Xeniumranger bundle. + +output: + - spatialaxe_bundle: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample' ] + - "spatialdata_spatialaxe_final": + type: folder + description: Spatialdata folder + pattern: "${meta.id}/spatialdata_spatialaxe_final/*" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@heylf" diff --git a/modules/local/spatialdata/write/main.nf b/modules/local/spatialdata/write/main.nf new file mode 100644 index 00000000..6caed6c1 --- /dev/null +++ b/modules/local/spatialdata/write/main.nf @@ -0,0 +1,53 @@ +process SPATIALDATA_WRITE { + tag "${meta.id}" + label 'process_high' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/cb/cb8fc03fa657c164c5d83f075578bbb5d9c10f1178165f94e94f33c67efca1a1/data' : + 'community.wave.seqera.io/library/spatialdata-io_spatialdata:b264928c30680e87' }" + + input: + tuple val(meta), path(bundle, stageAs: "*") + val(outputfolder) + val(segmented_object) + val(coordinate_space) + + output: + tuple val(meta), path("spatialdata/${prefix}/${outputfolder}"), emit: spatialdata + tuple val("${task.process}"), val('spatialdata'), eval("pip show spatialdata | sed -n 's/^Version: //p'"), topic: versions, emit: versions_spatialdata + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("SPATIALDATA_WRITE module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + """ + spatialdata_write.py \\ + --bundle ${bundle} \\ + --prefix ${prefix} \\ + --output-folder ${outputfolder} \\ + --segmented-object ${segmented_object} \\ + --coordinate-space ${coordinate_space} \\ + ${args} + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("SPATIALDATA_WRITE module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p "spatialdata/${prefix}/${outputfolder}" + touch "spatialdata/${prefix}/${outputfolder}/fake_file.txt" + """ +} diff --git a/modules/local/spatialdata/write/meta.yml b/modules/local/spatialdata/write/meta.yml new file mode 100644 index 00000000..1c14653a --- /dev/null +++ b/modules/local/spatialdata/write/meta.yml @@ -0,0 +1,40 @@ +name: spatialdata_write +description: Convert 10x Xenium data bundle into a SpatialData bundle. +keywords: + - data + - spatialomics +tools: + - custom: + description: An open and universal framework for processing spatial omics data + homepage: https://github.com/scverse/spatialdata + documentation: https://spatialdata.scverse.org/en/latest/ + licence: ["BSD 3-Clause"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample' ] + - bundle: + type: folder + description: | + Path to spatialomics data bundle (e.g., from Xeniumranger) + +output: + - spatialdata: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample' ] + - "${outputfolder}": + type: folder + description: Spatialdata folder + pattern: "${meta.id}/spatialdata/*" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@heylf" diff --git a/modules/local/utility/convert_mask_uint32/main.nf b/modules/local/utility/convert_mask_uint32/main.nf new file mode 100644 index 00000000..40e5c35c --- /dev/null +++ b/modules/local/utility/convert_mask_uint32/main.nf @@ -0,0 +1,48 @@ +/* + * CONVERT_MASK_UINT32: Convert segmentation mask to uint32 dtype. + * + * XeniumRanger import-segmentation requires uint32 masks. + * StarDist outputs int32 labels by default. + * + * Input: + * - meta: Sample metadata map + * - mask: Segmentation mask TIFF (any integer dtype) + * + * Output: + * - mask: uint32 segmentation mask TIFF + * - versions: Software versions + */ +process CONVERT_MASK_UINT32 { + tag "${meta.id}" + label 'process_low' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/d9/d964e0bef867bb2ff1a309c9c087d8d83ac734ce3aa315dd8311d4c1bfdafd8e/data' : + 'community.wave.seqera.io/library/python_pip_imagecodecs_nvidia-cublas-cu12_pruned:b668bcb6d531d350' }" + + input: + tuple val(meta), path(mask) + + output: + tuple val(meta), path("${prefix}_uint32_mask.tif"), emit: mask + tuple val("${task.process}"), val('python'), eval("python3 --version | sed 's/Python //'"), topic: versions, emit: versions_python + tuple val("${task.process}"), val('tifffile'), eval("python3 -c 'import tifffile; print(tifffile.__version__)'"), topic: versions, emit: versions_tifffile + tuple val("${task.process}"), val('numpy'), eval("python3 -c 'import numpy; print(numpy.__version__)'"), topic: versions, emit: versions_numpy + + when: + task.ext.when == null || task.ext.when + + script: + prefix = task.ext.prefix ?: "${meta.id}" + """ + utility_convert_mask_uint32.py \\ + --input ${mask} \\ + --output ${prefix}_uint32_mask.tif + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_uint32_mask.tif + """ +} diff --git a/modules/local/utility/convert_mask_uint32/meta.yml b/modules/local/utility/convert_mask_uint32/meta.yml new file mode 100644 index 00000000..0d6f4ff0 --- /dev/null +++ b/modules/local/utility/convert_mask_uint32/meta.yml @@ -0,0 +1,128 @@ +name: "convert_mask_uint32" +description: Convert a segmentation mask TIFF to uint32 dtype for downstream tools that require uint32 (e.g. XeniumRanger import-segmentation). +keywords: + - xenium + - segmentation + - mask + - tiff + - uint32 +tools: + - "python": + description: | + Python programming language interpreter, used here to load and rewrite + the segmentation mask with the required dtype. + homepage: "https://www.python.org/" + documentation: "https://docs.python.org/3/" + tool_dev_url: "https://github.com/python/cpython" + doi: "no DOI available" + licence: ["PSF-2.0"] + identifier: "" + - "tifffile": + description: | + Read and write TIFF files, including BigTIFF and OME-TIFF, in Python. + homepage: "https://github.com/cgohlke/tifffile" + documentation: "https://github.com/cgohlke/tifffile" + tool_dev_url: "https://github.com/cgohlke/tifffile" + doi: "no DOI available" + licence: ["BSD-3-Clause"] + identifier: "" + - "numpy": + description: | + The fundamental package for scientific computing with Python, + used to cast the mask array to uint32. + homepage: "https://numpy.org/" + documentation: "https://numpy.org/doc/stable/" + tool_dev_url: "https://github.com/numpy/numpy" + doi: "10.1038/s41586-020-2649-2" + licence: ["BSD-3-Clause"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - mask: + type: file + description: Segmentation mask TIFF (any integer dtype). + pattern: "*.{tif,tiff}" + ontologies: [] + +output: + mask: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*_uint32_mask.tif": + type: file + description: Segmentation mask TIFF cast to uint32. + pattern: "*_uint32_mask.tif" + ontologies: [] + versions_python: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "python3 --version | sed 's/Python //'": + type: eval + description: The expression to obtain the version of the tool + versions_tifffile: + - - ${task.process}: + type: string + description: The process the versions were collected from + - tifffile: + type: string + description: The tool name + - "python3 -c 'import tifffile; print(tifffile.__version__)'": + type: eval + description: The expression to obtain the version of the tool + versions_numpy: + - - ${task.process}: + type: string + description: The process the versions were collected from + - numpy: + type: string + description: The tool name + - "python3 -c 'import numpy; print(numpy.__version__)'": + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "python3 --version | sed 's/Python //'": + type: eval + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: The process the versions were collected from + - tifffile: + type: string + description: The tool name + - "python3 -c 'import tifffile; print(tifffile.__version__)'": + type: eval + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: The process the versions were collected from + - numpy: + type: string + description: The tool name + - "python3 -c 'import numpy; print(numpy.__version__)'": + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@an-altosian" +maintainers: + - "@an-altosian" diff --git a/modules/local/utility/downscale_morphology/main.nf b/modules/local/utility/downscale_morphology/main.nf new file mode 100644 index 00000000..ef5143ef --- /dev/null +++ b/modules/local/utility/downscale_morphology/main.nf @@ -0,0 +1,58 @@ +/* + * DOWNSCALE_MORPHOLOGY: Pre-downscale morphology image for cellpose + * + * Reduces image dimensions by a scale factor so that cellpose's internal + * rescaling (diam_mean/diameter) does not exceed GPU/CPU memory. + * The scale factor defaults to diameter/diam_mean (e.g., 9/30 = 0.3). + * After downscaling, cellpose should use --diameter 30 (no internal rescale). + * + * Input: + * - meta: Sample metadata map + * - image: Morphology OME-TIFF + * + * Output: + * - downscaled: Downscaled TIFF image + * - scale_info: JSON with scale factor and original dimensions + * - versions: Software versions + */ +process DOWNSCALE_MORPHOLOGY { + tag "${meta.id}" + label 'process_medium' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/cb/cb670191b7ae1a9fd5449746453916c7014b9ea622942ca76a7cb40da7deee46/data' : + 'community.wave.seqera.io/library/python_pip_cellpose:fdf7a8c3a305a26e' }" + + input: + tuple val(meta), path(image) + + output: + tuple val(meta), path("${prefix}/downscaled.tif"), emit: downscaled + tuple val(meta), path("${prefix}/scale_info.json"), emit: scale_info + tuple val("${task.process}"), val('python'), eval("python3 --version | sed 's/Python //'"), topic: versions, emit: versions_python + tuple val("${task.process}"), val('tifffile'), eval("pip show tifffile 2>/dev/null | sed -n 's/^Version: //p'"), topic: versions, emit: versions_tifffile + tuple val("${task.process}"), val('scikit-image'), eval("pip show scikit-image 2>/dev/null | sed -n 's/^Version: //p'"), topic: versions, emit: versions_skimage + + when: + task.ext.when == null || task.ext.when + + script: + def diameter = task.ext.diameter ?: 9 + def diam_mean = 30 + prefix = task.ext.prefix ?: "${meta.id}" + """ + utility_downscale_morphology.py \\ + --image ${image} \\ + --diameter ${diameter} \\ + --diam-mean ${diam_mean} \\ + --prefix ${prefix} + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir -p ${prefix} + touch ${prefix}/downscaled.tif + echo '{"scale": 0.3}' > ${prefix}/scale_info.json + """ +} diff --git a/modules/local/utility/downscale_morphology/meta.yml b/modules/local/utility/downscale_morphology/meta.yml new file mode 100644 index 00000000..83408899 --- /dev/null +++ b/modules/local/utility/downscale_morphology/meta.yml @@ -0,0 +1,138 @@ +name: "downscale_morphology" +description: Pre-downscale a morphology image so that cellpose's internal rescaling does not exceed memory limits. +keywords: + - xenium + - morphology + - downscale + - image processing + - cellpose +tools: + - "python": + description: | + Python programming language interpreter. + homepage: "https://www.python.org/" + documentation: "https://docs.python.org/3/" + tool_dev_url: "https://github.com/python/cpython" + doi: "no DOI available" + licence: ["PSF-2.0"] + identifier: "" + - "tifffile": + description: | + Read and write TIFF files, including BigTIFF and OME-TIFF, in Python. + homepage: "https://github.com/cgohlke/tifffile" + documentation: "https://github.com/cgohlke/tifffile" + tool_dev_url: "https://github.com/cgohlke/tifffile" + doi: "no DOI available" + licence: ["BSD-3-Clause"] + identifier: "" + - "scikit-image": + description: | + Image processing library for Python (scikit-image), used here for + anti-aliased image resizing. + homepage: "https://scikit-image.org/" + documentation: "https://scikit-image.org/docs/stable/" + tool_dev_url: "https://github.com/scikit-image/scikit-image" + doi: "10.7717/peerj.453" + licence: ["BSD-3-Clause"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - image: + type: file + description: Morphology OME-TIFF image. + pattern: "*.{ome.tif,ome.tiff,tif,tiff}" + ontologies: [] + +output: + downscaled: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*/downscaled.tif": + type: file + description: Downscaled TIFF image. + pattern: "*/downscaled.tif" + ontologies: [] + scale_info: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*/scale_info.json": + type: file + description: JSON file with scale factor and original/new dimensions. + pattern: "*/scale_info.json" + ontologies: [] + versions_python: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "python3 --version | sed 's/Python //'": + type: eval + description: The expression to obtain the version of the tool + versions_tifffile: + - - ${task.process}: + type: string + description: The process the versions were collected from + - tifffile: + type: string + description: The tool name + - 'python3 -c "import tifffile; print(tifffile.__version__)"': + type: eval + description: The expression to obtain the version of the tool + versions_skimage: + - - ${task.process}: + type: string + description: The process the versions were collected from + - scikit-image: + type: string + description: The tool name + - 'python3 -c "import skimage; print(skimage.__version__)"': + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "python3 --version | sed 's/Python //'": + type: eval + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: The process the versions were collected from + - tifffile: + type: string + description: The tool name + - 'python3 -c "import tifffile; print(tifffile.__version__)"': + type: eval + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: The process the versions were collected from + - scikit-image: + type: string + description: The tool name + - 'python3 -c "import skimage; print(skimage.__version__)"': + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@an-altosian" +maintainers: + - "@an-altosian" diff --git a/modules/local/utility/extract_dapi/main.nf b/modules/local/utility/extract_dapi/main.nf new file mode 100644 index 00000000..ef9a88bd --- /dev/null +++ b/modules/local/utility/extract_dapi/main.nf @@ -0,0 +1,50 @@ +/* + * EXTRACT_DAPI: Extract DAPI channel (channel 0) from multi-channel OME-TIFF. + * + * Xenium morphology_focus.ome.tif has multiple channels (DAPI, boundary, interior); + * StarDist 2D_versatile_fluo expects single-channel input. + * + * Input: + * - meta: Sample metadata map + * - image: Multi-channel OME-TIFF morphology image + * + * Output: + * - dapi: Single-channel DAPI TIFF + * - versions: Software versions + */ +process EXTRACT_DAPI { + tag "${meta.id}" + label 'process_low' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/d9/d964e0bef867bb2ff1a309c9c087d8d83ac734ce3aa315dd8311d4c1bfdafd8e/data' : + 'community.wave.seqera.io/library/python_pip_imagecodecs_nvidia-cublas-cu12_pruned:b668bcb6d531d350' }" + + input: + tuple val(meta), path(image) + + output: + tuple val(meta), path("${prefix}_dapi.tif"), emit: dapi + tuple val("${task.process}"), val('python'), eval("python3 --version | sed 's/Python //'"), topic: versions, emit: versions_python + tuple val("${task.process}"), val('tifffile'), eval("python3 -c 'import tifffile; print(tifffile.__version__)'"), topic: versions, emit: versions_tifffile + tuple val("${task.process}"), val('numpy'), eval("python3 -c 'import numpy; print(numpy.__version__)'"), topic: versions, emit: versions_numpy + + when: + task.ext.when == null || task.ext.when + + script: + prefix = task.ext.prefix ?: "${meta.id}" + def channel_index = task.ext.channel_index ?: 0 + """ + utility_extract_dapi.py \\ + --input ${image} \\ + --output ${prefix}_dapi.tif \\ + --channel-index ${channel_index} + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_dapi.tif + """ +} diff --git a/modules/local/utility/extract_dapi/meta.yml b/modules/local/utility/extract_dapi/meta.yml new file mode 100644 index 00000000..11ce4342 --- /dev/null +++ b/modules/local/utility/extract_dapi/meta.yml @@ -0,0 +1,126 @@ +name: "extract_dapi" +description: Extract a single channel (default DAPI / channel 0) from a multi-channel OME-TIFF morphology image. +keywords: + - xenium + - dapi + - morphology + - ome-tiff + - image processing +tools: + - "python": + description: | + Python programming language interpreter. + homepage: "https://www.python.org/" + documentation: "https://docs.python.org/3/" + tool_dev_url: "https://github.com/python/cpython" + doi: "no DOI available" + licence: ["PSF-2.0"] + identifier: "" + - "tifffile": + description: | + Read and write TIFF files, including BigTIFF and OME-TIFF, in Python. + homepage: "https://github.com/cgohlke/tifffile" + documentation: "https://github.com/cgohlke/tifffile" + tool_dev_url: "https://github.com/cgohlke/tifffile" + doi: "no DOI available" + licence: ["BSD-3-Clause"] + identifier: "" + - "numpy": + description: | + The fundamental package for scientific computing with Python. + homepage: "https://numpy.org/" + documentation: "https://numpy.org/doc/stable/" + tool_dev_url: "https://github.com/numpy/numpy" + doi: "10.1038/s41586-020-2649-2" + licence: ["BSD-3-Clause"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - image: + type: file + description: Multi-channel OME-TIFF morphology image. + pattern: "*.{ome.tif,ome.tiff,tif,tiff}" + ontologies: [] + +output: + dapi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*_dapi.tif": + type: file + description: Single-channel TIFF containing the extracted channel. + pattern: "*_dapi.tif" + ontologies: [] + versions_python: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "python3 --version | sed 's/Python //'": + type: eval + description: The expression to obtain the version of the tool + versions_tifffile: + - - ${task.process}: + type: string + description: The process the versions were collected from + - tifffile: + type: string + description: The tool name + - "python3 -c 'import tifffile; print(tifffile.__version__)'": + type: eval + description: The expression to obtain the version of the tool + versions_numpy: + - - ${task.process}: + type: string + description: The process the versions were collected from + - numpy: + type: string + description: The tool name + - "python3 -c 'import numpy; print(numpy.__version__)'": + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "python3 --version | sed 's/Python //'": + type: eval + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: The process the versions were collected from + - tifffile: + type: string + description: The tool name + - "python3 -c 'import tifffile; print(tifffile.__version__)'": + type: eval + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: The process the versions were collected from + - numpy: + type: string + description: The tool name + - "python3 -c 'import numpy; print(numpy.__version__)'": + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@an-altosian" +maintainers: + - "@an-altosian" diff --git a/modules/local/utility/extract_preview_data/main.nf b/modules/local/utility/extract_preview_data/main.nf new file mode 100644 index 00000000..821effc5 --- /dev/null +++ b/modules/local/utility/extract_preview_data/main.nf @@ -0,0 +1,49 @@ +process EXTRACT_PREVIEW_DATA { + tag "${meta.id}" + label 'process_low' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c6/c6ebf365fbfd7bdde9e1453d646f45c39eddde92df5922b9881785f347bdbc2b/data' : + 'community.wave.seqera.io/library/beautifulsoup4_pandas:a3f88f59088edad5' }" + + input: + tuple val(meta), path(preview_html) + + output: + tuple val(meta), path("${prefix}/*_mqc.tsv"), emit: mqc_data + tuple val(meta), path("${prefix}/*_mqc.png"), emit: mqc_img + tuple val("${task.process}"), val('python'), eval("python3 --version | sed 's/Python //'"), topic: versions, emit: versions_python + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("EXTRACT_PREVIEW_DATA module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + prefix = task.ext.prefix ?: "${meta.id}" + + """ + utility_extract_preview_data.py \\ + --preview-html ${preview_html} \\ + --prefix ${prefix} + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("EXTRACT_PREVIEW_DATA module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p ${prefix} + touch ${prefix}/noise_distribution_mqc.tsv + touch ${prefix}/gene_structure_mqc.tsv + touch ${prefix}/umap_mqc.tsv + touch ${prefix}/transcript_plots_mqc.png + touch ${prefix}/noise_level_mqc.png + """ +} diff --git a/modules/local/utility/extract_preview_data/meta.yml b/modules/local/utility/extract_preview_data/meta.yml new file mode 100644 index 00000000..d1b448e0 --- /dev/null +++ b/modules/local/utility/extract_preview_data/meta.yml @@ -0,0 +1,99 @@ +name: "extract_preview_data" +description: Parse a Xenium preview HTML report and extract MultiQC-compatible TSV tables and embedded plot images. +keywords: + - xenium + - preview + - multiqc + - html + - parsing +tools: + - "python": + description: | + Python programming language interpreter. + homepage: "https://www.python.org/" + documentation: "https://docs.python.org/3/" + tool_dev_url: "https://github.com/python/cpython" + doi: "no DOI available" + licence: ["PSF-2.0"] + identifier: "" + - "beautifulsoup4": + description: | + Python library for pulling data out of HTML and XML files. + homepage: "https://www.crummy.com/software/BeautifulSoup/" + documentation: "https://www.crummy.com/software/BeautifulSoup/bs4/doc/" + tool_dev_url: "https://git.launchpad.net/beautifulsoup" + doi: "no DOI available" + licence: ["MIT"] + identifier: "" + - "pandas": + description: | + Powerful data structures for data analysis, time series, and statistics. + homepage: "https://pandas.pydata.org/" + documentation: "https://pandas.pydata.org/docs/" + tool_dev_url: "https://github.com/pandas-dev/pandas" + doi: "10.5281/zenodo.3509134" + licence: ["BSD-3-Clause"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - preview_html: + type: file + description: Xenium preview HTML report to extract data from. + pattern: "*.html" + ontologies: [] + +output: + mqc_data: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "${prefix}/*_mqc.tsv": + type: file + description: MultiQC-compatible TSV tables extracted from the preview HTML. + pattern: "*_mqc.tsv" + ontologies: [] + mqc_img: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "${prefix}/*_mqc.png": + type: file + description: MultiQC-compatible PNG plot images extracted from the preview HTML. + pattern: "*_mqc.png" + ontologies: [] + versions_python: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "python3 --version | sed 's/Python //'": + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "python3 --version | sed 's/Python //'": + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@khersameesh24" +maintainers: + - "@khersameesh24" diff --git a/modules/local/utility/get_coordinates/main.nf b/modules/local/utility/get_coordinates/main.nf new file mode 100644 index 00000000..2b672239 --- /dev/null +++ b/modules/local/utility/get_coordinates/main.nf @@ -0,0 +1,42 @@ +process GET_TRANSCRIPTS_COORDINATES { + tag "${meta.id}" + label 'process_low' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/94/9409ce399922a5746bea1b7df5668c3d1d79b9af49a15950d9818c4fe45ac749/data' : + 'community.wave.seqera.io/library/pandas_procs_pyarrow:d8f882b65dfea451' }" + + input: + tuple val(meta), path(transcripts) + + output: + stdout() + tuple val("${task.process}"), val('python'), eval("python3 --version | sed 's/Python //'"), topic: versions, emit: versions_python + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("GET_TRANSCRIPTS_COORDINATES module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + prefix = task.ext.prefix ?: "${meta.id}" + + """ + utility_get_coordinates.py \\ + --transcripts ${transcripts} + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("GET_TRANSCRIPTS_COORDINATES module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + prefix = task.ext.prefix ?: "${meta.id}" + + """ + echo "0,0,1000,1000" + """ +} diff --git a/modules/local/utility/get_coordinates/meta.yml b/modules/local/utility/get_coordinates/meta.yml new file mode 100644 index 00000000..cf631eda --- /dev/null +++ b/modules/local/utility/get_coordinates/meta.yml @@ -0,0 +1,81 @@ +name: "get_transcripts_coordinates" +description: Read a transcripts parquet file and emit the transcript bounding-box coordinates to stdout. +keywords: + - xenium + - transcripts + - coordinates + - parquet + - bounding-box +tools: + - "python": + description: | + Python programming language interpreter. + homepage: "https://www.python.org/" + documentation: "https://docs.python.org/3/" + tool_dev_url: "https://github.com/python/cpython" + doi: "no DOI available" + licence: ["PSF-2.0"] + identifier: "" + - "pandas": + description: | + Powerful data structures for data analysis, time series, and statistics. + homepage: "https://pandas.pydata.org/" + documentation: "https://pandas.pydata.org/docs/" + tool_dev_url: "https://github.com/pandas-dev/pandas" + doi: "10.5281/zenodo.3509134" + licence: ["BSD-3-Clause"] + identifier: "" + - "pyarrow": + description: | + Python bindings for Apache Arrow, used to read Parquet files efficiently. + homepage: "https://arrow.apache.org/" + documentation: "https://arrow.apache.org/docs/python/" + tool_dev_url: "https://github.com/apache/arrow" + doi: "no DOI available" + licence: ["Apache-2.0"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - transcripts: + type: file + description: Transcripts parquet file from the Xenium bundle. + pattern: "*.parquet" + ontologies: [] + +output: + stdout: + - stdout: + type: string + description: Comma-separated transcript bounding-box coordinates printed to stdout. + versions_python: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "python3 --version | sed 's/Python //'": + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "python3 --version | sed 's/Python //'": + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@khersameesh24" +maintainers: + - "@khersameesh24" diff --git a/modules/local/utility/parquet_to_csv/main.nf b/modules/local/utility/parquet_to_csv/main.nf new file mode 100644 index 00000000..865408bc --- /dev/null +++ b/modules/local/utility/parquet_to_csv/main.nf @@ -0,0 +1,46 @@ +process PARQUET_TO_CSV { + tag "$meta.id" + label 'process_low' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/94/9409ce399922a5746bea1b7df5668c3d1d79b9af49a15950d9818c4fe45ac749/data' : + 'community.wave.seqera.io/library/pandas_procs_pyarrow:d8f882b65dfea451' }" + + input: + tuple val(meta), path(transcripts) + val(extension) + + output: + tuple val(meta), path("${prefix}/*.csv*"), emit: transcripts_csv + tuple val("${task.process}"), val('pyarrow'), eval("pip show pyarrow 2>/dev/null | sed -n 's/^Version: //p'"), topic: versions, emit: versions_pyarrow + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "PARQUET_TO_CSV module does not support Conda. Please use Docker / Singularity / Podman instead." + } + prefix = task.ext.prefix ?: "${meta.id}" + + """ + utility_parquet_to_csv.py \\ + --transcripts ${transcripts} \\ + --extension ${extension} \\ + --prefix ${prefix} + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "PARQUET_TO_CSV module does not support Conda. Please use Docker / Singularity / Podman instead." + } + + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p ${prefix} + touch "${prefix}/${transcripts}.csv" + """ +} diff --git a/modules/local/utility/parquet_to_csv/meta.yml b/modules/local/utility/parquet_to_csv/meta.yml new file mode 100644 index 00000000..60d58fcc --- /dev/null +++ b/modules/local/utility/parquet_to_csv/meta.yml @@ -0,0 +1,42 @@ +name: "parquet_to_csv" +description: Tool suite for spatial omics data conversions. +keywords: + - xenium +tools: + - "parquet_to_csv": + description: "Collects functions to convert data formats for various types of data processing and analysis for spatial omics data." + homepage: "https://github.com/heylf/spatialconverter" + documentation: "https://github.com/heylf/spatialconverter" + tool_dev_url: "https://github.com/heylf/spatialconverter" + doi: "tbd" + licence: [""] + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'segger_run_id' ]` + - transcripts: + type: file + description: | + Transcript file in parquet format. +output: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'segger_run_id' ]` + - transcripts_csv: + type: file + description: | + Transcript file in csv format. + - - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@heylf" +maintainers: + - "@heylf" diff --git a/modules/local/utility/reconstruct_patches/main.nf b/modules/local/utility/reconstruct_patches/main.nf new file mode 100644 index 00000000..c0e87b1b --- /dev/null +++ b/modules/local/utility/reconstruct_patches/main.nf @@ -0,0 +1,68 @@ +/* + * RECONSTRUCT_PATCHES: Reconstruct the patches directory structure from + * individually staged patch files for stitch_transcripts.py. + * + * Inputs: + * meta - sample metadata map + * grid_json - patch_grid.json from XENIUM_PATCH_DIVIDE + * patch_ids - list of patch identifiers (e.g. patch_0000, patch_0001, ...) + * csv_files - per-patch Baysor segmentation.csv files (staged into csv_?/ dirs) + * geojson_files - per-patch Baysor segmentation_polygons.json files (staged into geo_?/ dirs) + * + * Outputs: + * patches_dir - reconstructed patches/ directory containing patch_grid.json plus + * one subdirectory per patch with segmentation.csv and segmentation_polygons.json + * versions - topic-channel version emission for coreutils (cp) + */ +process RECONSTRUCT_PATCHES { + tag "$meta.id" + label 'process_single' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b9/b900c562dadb26dedce5254f88ae85440d7a08cd5e7f72cc4c3ce5aef89b5aa8/data' : + 'community.wave.seqera.io/library/pip_pandas:257725bfe0d2df83' }" + + input: + tuple val(meta), path(grid_json), val(patch_ids), path(csv_files, stageAs: 'csv_?/*'), path(geojson_files, stageAs: 'geo_?/*') + + output: + tuple val(meta), path("patches"), emit: patches_dir + tuple val("${task.process}"), val('coreutils'), eval("cp --version | head -n1 | awk '{print \$NF}'"), topic: versions, emit: versions_coreutils + + when: + task.ext.when == null || task.ext.when + + script: + def ids = patch_ids instanceof List ? patch_ids : [patch_ids] + def csvs = csv_files instanceof List ? csv_files : [csv_files] + def geos = geojson_files instanceof List ? geojson_files : [geojson_files] + + def reconstruct_script = ids.withIndex().collect { pid, idx -> + [ + "mkdir -p patches/${pid}", + "cp '${csvs[idx]}' patches/${pid}/segmentation.csv", + "cp '${geos[idx]}' patches/${pid}/segmentation_polygons.json", + ].join('\n ') + }.join('\n ') + """ + mkdir -p patches + cp '${grid_json}' patches/patch_grid.json + + ${reconstruct_script} + """ + + stub: + def ids = patch_ids instanceof List ? patch_ids : [patch_ids] + def stub_files = ids.collect { pid -> + [ + "mkdir -p patches/${pid}", + "touch patches/${pid}/segmentation.csv", + "touch patches/${pid}/segmentation_polygons.json", + ].join('\n ') + }.join('\n ') + """ + mkdir -p patches + touch patches/patch_grid.json + ${stub_files} + """ +} diff --git a/modules/local/utility/reconstruct_patches/meta.yml b/modules/local/utility/reconstruct_patches/meta.yml new file mode 100644 index 00000000..b218c15a --- /dev/null +++ b/modules/local/utility/reconstruct_patches/meta.yml @@ -0,0 +1,87 @@ +name: "reconstruct_patches" +description: Reconstruct the patches directory structure from individually staged per-patch Baysor segmentation files for downstream stitching. +keywords: + - baysor + - xenium + - patches + - stitching + - segmentation +tools: + - "coreutils": + description: | + GNU core utilities (cp, mkdir) used to assemble the patches directory. + homepage: "https://www.gnu.org/software/coreutils/" + documentation: "https://www.gnu.org/software/coreutils/manual/coreutils.html" + tool_dev_url: "https://git.savannah.gnu.org/cgit/coreutils.git" + doi: "no DOI available" + licence: ["GPL-3.0-or-later"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1' ] + - grid_json: + type: file + description: patch_grid.json describing the tile layout from XENIUM_PATCH_DIVIDE. + pattern: "*.json" + ontologies: [] + - patch_ids: + type: list + description: | + Ordered list of patch identifiers + (e.g. patch_0000, patch_0001, ...). + - csv_files: + type: list + description: Per-patch Baysor segmentation.csv files, one per patch_id. + pattern: "*.csv" + ontologies: [] + - geojson_files: + type: list + description: Per-patch Baysor segmentation_polygons.json files, one per patch_id. + pattern: "*.json" + ontologies: [] + +output: + patches_dir: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1' ] + - "patches": + type: directory + description: | + Reconstructed patches directory containing patch_grid.json plus one + subdirectory per patch with segmentation.csv and + segmentation_polygons.json. + pattern: "patches" + versions_coreutils: + - - ${task.process}: + type: string + description: The process the versions were collected from + - coreutils: + type: string + description: The tool name + - "cp --version | head -n1 | awk '{print $NF}'": + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - coreutils: + type: string + description: The tool name + - "cp --version | head -n1 | awk '{print $NF}'": + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@an-altosian" +maintainers: + - "@an-altosian" diff --git a/modules/local/utility/resize_tif/main.nf b/modules/local/utility/resize_tif/main.nf new file mode 100644 index 00000000..00fe2134 --- /dev/null +++ b/modules/local/utility/resize_tif/main.nf @@ -0,0 +1,48 @@ +process RESIZE_TIF { + tag "${meta.id}" + label 'process_low' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/6d/6d5aedb8fcf066eecd9f0dfac93bfffc8161bdae65b4502509d9953db2036a7e/data' : + 'community.wave.seqera.io/library/numpy_pandas_pyarrow_scikit-image_tifffile:131397039376b375' }" + + input: + tuple val(meta), path(transcripts), path(mask), path(metadata) + + output: + tuple val(meta), path("${meta.id}/resized_*.tif"), emit: resized_mask + tuple val("${task.process}"), val('python'), eval("python3 --version | sed 's/Python //'"), topic: versions, emit: versions_python + tuple val("${task.process}"), val('tifffile'), eval("pip show tifffile 2>/dev/null | sed -n 's/^Version: //p'"), topic: versions, emit: versions_tifffile + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("RESIZE_TIF module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + prefix = task.ext.prefix ?: "${meta.id}" + + """ + utility_resize_tif.py \\ + --mask ${mask} \\ + --transcripts ${transcripts} \\ + --metadata ${metadata} \\ + --prefix ${prefix} \\ + --mask-filename ${mask} + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("RESIZE_TIF module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p ${prefix} + touch "${prefix}/resized_${mask}.tif" + """ +} diff --git a/modules/local/utility/resize_tif/meta.yml b/modules/local/utility/resize_tif/meta.yml new file mode 100644 index 00000000..482c7fd9 --- /dev/null +++ b/modules/local/utility/resize_tif/meta.yml @@ -0,0 +1,126 @@ +name: "resize_tif" +description: Resize a segmentation mask TIFF to match the full-resolution Xenium morphology image dimensions. +keywords: + - xenium + - tif + - resize + - mask + - segmentation +tools: + - "python": + description: | + Python programming language interpreter. + homepage: "https://www.python.org/" + documentation: "https://docs.python.org/3/" + tool_dev_url: "https://github.com/python/cpython" + doi: "no DOI available" + licence: ["PSF-2.0"] + identifier: "" + - "tifffile": + description: | + Read and write TIFF files, including BigTIFF and OME-TIFF, in Python. + homepage: "https://github.com/cgohlke/tifffile" + documentation: "https://github.com/cgohlke/tifffile" + tool_dev_url: "https://github.com/cgohlke/tifffile" + doi: "no DOI available" + licence: ["BSD-3-Clause"] + identifier: "" + - "numpy": + description: | + The fundamental package for scientific computing with Python. + homepage: "https://numpy.org/" + documentation: "https://numpy.org/doc/stable/" + tool_dev_url: "https://github.com/numpy/numpy" + doi: "10.1038/s41586-020-2649-2" + licence: ["BSD-3-Clause"] + identifier: "" + - "scikit-image": + description: | + Image processing routines for SciPy. + homepage: "https://scikit-image.org/" + documentation: "https://scikit-image.org/docs/stable/" + tool_dev_url: "https://github.com/scikit-image/scikit-image" + doi: "10.7717/peerj.453" + licence: ["BSD-3-Clause"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - transcripts: + type: file + description: Transcripts parquet file from the Xenium bundle. + pattern: "*.parquet" + ontologies: [] + - mask: + type: file + description: Segmentation mask TIFF to resize. + pattern: "*.{tif,tiff}" + ontologies: [] + - metadata: + type: file + description: Xenium experiment metadata file (used to read full-resolution dimensions). + pattern: "*" + ontologies: [] + +output: + resized_mask: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "${meta.id}/resized_*.tif": + type: file + description: Segmentation mask TIFF resized to the full-resolution morphology image dimensions. + pattern: "*/resized_*.tif" + ontologies: [] + versions_python: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "python3 --version | sed 's/Python //'": + type: eval + description: The expression to obtain the version of the tool + versions_tifffile: + - - ${task.process}: + type: string + description: The process the versions were collected from + - tifffile: + type: string + description: The tool name + - 'python3 -c "import tifffile; print(tifffile.__version__)"': + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "python3 --version | sed 's/Python //'": + type: eval + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: The process the versions were collected from + - tifffile: + type: string + description: The tool name + - 'python3 -c "import tifffile; print(tifffile.__version__)"': + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@khersameesh24" +maintainers: + - "@khersameesh24" diff --git a/modules/local/utility/segger2xr/main.nf b/modules/local/utility/segger2xr/main.nf new file mode 100644 index 00000000..073748d7 --- /dev/null +++ b/modules/local/utility/segger2xr/main.nf @@ -0,0 +1,50 @@ +process SEGGER2XR { + tag "$meta.id" + label 'process_medium' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/cb/cb8fc03fa657c164c5d83f075578bbb5d9c10f1178165f94e94f33c67efca1a1/data' : + 'community.wave.seqera.io/library/spatialdata-io_spatialdata:b264928c30680e87' }" + + input: + tuple val(meta), path(transcripts) + + output: + tuple val(meta), path("${meta.id}/segmentation.csv") , emit: segmentation_csv + tuple val(meta), path("${meta.id}/transcripts.parquet") , emit: transcripts_parquet + tuple val(meta), path("${meta.id}/segmentation_polygons.json") , emit: viz_polygons + tuple val("${task.process}"), val('python'), eval("python3 --version | sed 's/Python //'"), topic: versions, emit: versions_python + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "SEGGER2XR module does not support Conda. Please use Docker / Singularity / Podman instead." + } + + def min_transcripts = task.ext.min_transcripts_per_cell ?: 3 + + """ + utility_segger2xr.py \\ + --transcripts ${transcripts} \\ + --prefix ${meta.id} \\ + --min-transcripts ${min_transcripts} + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "SEGGER2XR module does not support Conda. Please use Docker / Singularity / Podman instead." + } + + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p ${prefix} + echo 'transcript_id,x,y,z,gene,cell,is_noise' > "${prefix}/segmentation.csv" + touch "${prefix}/transcripts.parquet" + echo '{"type":"FeatureCollection","features":[]}' > "${prefix}/segmentation_polygons.json" + """ +} diff --git a/modules/local/utility/segger2xr/meta.yml b/modules/local/utility/segger2xr/meta.yml new file mode 100644 index 00000000..9a7364bf --- /dev/null +++ b/modules/local/utility/segger2xr/meta.yml @@ -0,0 +1,101 @@ +name: "segger2xr" +description: Convert SEGGER segmentation output (transcripts parquet with cell assignments) into XeniumRanger-compatible segmentation files. +keywords: + - xenium + - segger + - xeniumranger + - segmentation + - conversion +tools: + - "python": + description: | + Python programming language interpreter. + homepage: "https://www.python.org/" + documentation: "https://docs.python.org/3/" + tool_dev_url: "https://github.com/python/cpython" + doi: "no DOI available" + licence: ["PSF-2.0"] + identifier: "" + - "segger": + description: | + SEGGER cell segmentation for spatial transcriptomics data. + homepage: "https://github.com/EliHei2/segger_dev" + documentation: "https://github.com/EliHei2/segger_dev" + tool_dev_url: "https://github.com/EliHei2/segger_dev" + doi: "no DOI available" + licence: ["MIT"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - transcripts: + type: file + description: Transcripts parquet file output by SEGGER with per-transcript cell assignments. + pattern: "*.parquet" + ontologies: [] + +output: + segmentation_csv: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "${meta.id}/segmentation.csv": + type: file + description: Per-transcript segmentation table (transcript_id, x, y, z, gene, cell, is_noise). + pattern: "*/segmentation.csv" + ontologies: [] + transcripts_parquet: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "${meta.id}/transcripts.parquet": + type: file + description: Transcripts parquet file in XeniumRanger-compatible format. + pattern: "*/transcripts.parquet" + ontologies: [] + viz_polygons: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "${meta.id}/segmentation_polygons.json": + type: file + description: GeoJSON FeatureCollection of cell segmentation polygons for visualization. + pattern: "*/segmentation_polygons.json" + ontologies: [] + versions_python: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "python3 --version | sed 's/Python //'": + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "python3 --version | sed 's/Python //'": + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@heylf" +maintainers: + - "@heylf" diff --git a/modules/local/utility/split_transcripts/main.nf b/modules/local/utility/split_transcripts/main.nf new file mode 100644 index 00000000..f7057e31 --- /dev/null +++ b/modules/local/utility/split_transcripts/main.nf @@ -0,0 +1,46 @@ +process SPLIT_TRANSCRIPTS { + tag "$meta.id" + label 'process_low' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b9/b900c562dadb26dedce5254f88ae85440d7a08cd5e7f72cc4c3ce5aef89b5aa8/data' : + 'community.wave.seqera.io/library/pip_pandas:257725bfe0d2df83' }" + + input: + tuple val(meta), path(transcripts) + val(x_bins) + val(y_bins) + + output: + tuple val(meta), path("${meta.id}/splits.csv"), emit: splits_csv + tuple val("${task.process}"), val('python'), eval("python3 --version | sed 's/Python //'"), topic: versions, emit: versions_python + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "SPLIT_TRANSCRIPTS module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + utility_split_transcripts.py \\ + --transcripts ${transcripts} \\ + --x-bins ${x_bins} \\ + --y-bins ${y_bins} \\ + --prefix ${prefix} + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "SPLIT_TRANSCRIPTS module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir -p ${prefix} + touch "${prefix}/splits.csv" + """ +} diff --git a/modules/local/utility/split_transcripts/meta.yml b/modules/local/utility/split_transcripts/meta.yml new file mode 100644 index 00000000..8317c052 --- /dev/null +++ b/modules/local/utility/split_transcripts/meta.yml @@ -0,0 +1,54 @@ +name: "split_transcripts" +description: Split transcripts along x & y axes +keywords: + - baysor + - transcripts + - split_transcripts +tools: + - "baysor": + description: "Utility package to split transcripts for Baysor. Baysor is a tool that segments cells using spatial gene expression maps. Optionally, segmentation masks can be given as additional input." + homepage: "https://kharchenkolab.github.io/Baysor/dev/" + documentation: "https://kharchenkolab.github.io/Baysor/dev/" + tool_dev_url: "https://github.com/kharchenkolab/Baysor" + doi: "https://doi.org/10.1038/s41587-021-01044-w" + licence: ["MIT license"] + identifier: + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - transcripts: + type: file + description: transcripts.parquet file from the xenium bundle + pattern: "*.parquet" + - x_bins: + type: integer + description: number of slices along the x axis (default - 10) + - y_bins: + type: integer + description: number of slices along the y axis (default - 10) + +output: + - - splits_csv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.csv": + type: file + description: filtered transcripts.parquet + pattern: "splits.csv" + + - - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@khersameesh24" +maintainers: + - "@khersameesh24" diff --git a/modules/local/utility/upscale_mask/main.nf b/modules/local/utility/upscale_mask/main.nf new file mode 100644 index 00000000..2fc896e2 --- /dev/null +++ b/modules/local/utility/upscale_mask/main.nf @@ -0,0 +1,50 @@ +/* + * UPSCALE_MASK: Restore cellpose masks to original image resolution + * + * Uses nearest-neighbor interpolation to upscale segmentation masks + * back to original dimensions (from scale_info.json). + * + * Input: + * - meta: Sample metadata map + * - mask: Cellpose mask TIFF (downscaled resolution) + * - scale_info: JSON with original dimensions + * + * Output: + * - upscaled_mask: Mask at original resolution + * - versions: Software versions + */ +process UPSCALE_MASK { + tag "${meta.id}" + label 'process_medium' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/cb/cb670191b7ae1a9fd5449746453916c7014b9ea622942ca76a7cb40da7deee46/data' : + 'community.wave.seqera.io/library/python_pip_cellpose:fdf7a8c3a305a26e' }" + + input: + tuple val(meta), path(mask), path(scale_info) + + output: + tuple val(meta), path("${prefix}/upscaled_*.tif"), emit: upscaled_mask + tuple val("${task.process}"), val('python'), eval("python3 --version | sed 's/Python //'"), topic: versions, emit: versions_python + tuple val("${task.process}"), val('tifffile'), eval("pip show tifffile 2>/dev/null | sed -n 's/^Version: //p'"), topic: versions, emit: versions_tifffile + + when: + task.ext.when == null || task.ext.when + + script: + prefix = task.ext.prefix ?: "${meta.id}" + """ + utility_upscale_mask.py \\ + --mask ${mask} \\ + --scale-info ${scale_info} \\ + --prefix ${prefix} + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir -p ${prefix} + touch ${prefix}/upscaled_mask.tif + """ +} diff --git a/modules/local/utility/upscale_mask/meta.yml b/modules/local/utility/upscale_mask/meta.yml new file mode 100644 index 00000000..92c85e12 --- /dev/null +++ b/modules/local/utility/upscale_mask/meta.yml @@ -0,0 +1,103 @@ +name: "upscale_mask" +description: Restore segmentation masks to the original image resolution using nearest-neighbor interpolation. +keywords: + - xenium + - mask + - upscale + - segmentation + - cellpose +tools: + - "python": + description: | + Python programming language interpreter. + homepage: "https://www.python.org/" + documentation: "https://docs.python.org/3/" + tool_dev_url: "https://github.com/python/cpython" + doi: "no DOI available" + licence: ["PSF-2.0"] + identifier: "" + - "tifffile": + description: | + Read and write TIFF files, including BigTIFF and OME-TIFF, in Python. + homepage: "https://github.com/cgohlke/tifffile" + documentation: "https://github.com/cgohlke/tifffile" + tool_dev_url: "https://github.com/cgohlke/tifffile" + doi: "no DOI available" + licence: ["BSD-3-Clause"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - mask: + type: file + description: Cellpose segmentation mask TIFF at downscaled resolution. + pattern: "*.tif" + ontologies: [] + - scale_info: + type: file + description: JSON file containing original image dimensions (orig_h, orig_w). + pattern: "*.json" + ontologies: [] + +output: + upscaled_mask: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*/upscaled_*.tif": + type: file + description: Segmentation mask upscaled to the original image resolution. + pattern: "*/upscaled_*.tif" + ontologies: [] + versions_python: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "python3 --version | sed 's/Python //'": + type: eval + description: The expression to obtain the version of the tool + versions_tifffile: + - - ${task.process}: + type: string + description: The process the versions were collected from + - tifffile: + type: string + description: The tool name + - 'python3 -c "import tifffile; print(tifffile.__version__)"': + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "python3 --version | sed 's/Python //'": + type: eval + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: The process the versions were collected from + - tifffile: + type: string + description: The tool name + - 'python3 -c "import tifffile; print(tifffile.__version__)"': + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@an-altosian" +maintainers: + - "@an-altosian" diff --git a/modules/local/xenium_patch/divide/main.nf b/modules/local/xenium_patch/divide/main.nf new file mode 100644 index 00000000..957b1624 --- /dev/null +++ b/modules/local/xenium_patch/divide/main.nf @@ -0,0 +1,57 @@ +/* + * XENIUM_PATCH_DIVIDE: Split transcripts.parquet into overlapping patches. + * + * Input: + * - meta: Sample metadata map + * - transcripts: transcripts.parquet file + * - image: morphology image (for getting dimensions) + * + * Output: + * - grid: patch_grid.json metadata file + * - patch_transcripts: per-patch transcripts.parquet files (one per patch) + * - versions: Software versions + */ +process XENIUM_PATCH_DIVIDE { + tag "$meta.id" + label 'process_medium' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/f9/f9c8f3a2de4e2aa94500011f7d7d09276e9b6f2d79ee8737c9098fe22d4649bc/data' : + 'community.wave.seqera.io/library/sopa_procps-ng_pyarrow:c9ce8cd2ede79d72' }" + + input: + tuple val(meta), path(transcripts), path(image) + + output: + tuple val(meta), path("patches/patch_grid.json") , emit: grid + tuple val(meta), path("patches/patch_*/transcripts.parquet") , emit: patch_transcripts + tuple val("${task.process}"), val('python'), eval("python3 --version | sed 's/Python //'"), topic: versions, emit: versions_python + tuple val("${task.process}"), val('pyarrow'), eval("pip show pyarrow 2>/dev/null | sed -n 's/^Version: //p'"), topic: versions, emit: versions_pyarrow + + when: + task.ext.when == null || task.ext.when + + script: + def tile_width = task.ext.tile_width ?: 2000 + def overlap = task.ext.overlap ?: 50 + def balanced = task.ext.balanced + def balanced_flag = balanced == true || balanced == 'true' ? '--balanced' : '' + """ + divide_transcripts.py \\ + --transcripts ${transcripts} \\ + --output patches \\ + --tile-width ${tile_width} \\ + --overlap ${overlap} \\ + ${balanced_flag} \\ + --image-width \$(python3 -c "import tifffile; print(tifffile.imread('${image}').shape[-1])") \\ + --image-height \$(python3 -c "import tifffile; print(tifffile.imread('${image}').shape[-2])") + + """ + + stub: + """ + mkdir -p patches/patch_0_0 + touch patches/patch_0_0/transcripts.parquet + echo '{}' > patches/patch_grid.json + """ +} diff --git a/modules/local/xenium_patch/divide/meta.yml b/modules/local/xenium_patch/divide/meta.yml new file mode 100644 index 00000000..28983c90 --- /dev/null +++ b/modules/local/xenium_patch/divide/meta.yml @@ -0,0 +1,115 @@ +name: "xenium_patch_divide" +description: Split transcripts.parquet into overlapping spatial patches for tiled segmentation processing. +keywords: + - xenium + - patches + - tiling + - transcripts + - parquet +tools: + - "python": + description: | + Python programming language interpreter. + homepage: "https://www.python.org/" + documentation: "https://docs.python.org/3/" + tool_dev_url: "https://github.com/python/cpython" + doi: "no DOI available" + licence: ["PSF-2.0"] + identifier: "" + - "pyarrow": + description: | + Python bindings for Apache Arrow, used to read and write Parquet + files efficiently. + homepage: "https://arrow.apache.org/docs/python/" + documentation: "https://arrow.apache.org/docs/python/" + tool_dev_url: "https://github.com/apache/arrow" + doi: "no DOI available" + licence: ["Apache-2.0"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - transcripts: + type: file + description: transcripts.parquet file from the Xenium bundle. + pattern: "*.parquet" + ontologies: [] + - image: + type: file + description: Morphology image (used to read full-resolution dimensions). + pattern: "*.{ome.tif,ome.tiff,tif,tiff}" + ontologies: [] + +output: + grid: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "patches/patch_grid.json": + type: file + description: JSON file describing the patch grid (offsets, sizes, overlaps). + pattern: "patches/patch_grid.json" + ontologies: [] + patch_transcripts: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "patches/patch_*/transcripts.parquet": + type: file + description: Per-patch transcripts.parquet files (one per patch). + pattern: "patches/patch_*/transcripts.parquet" + ontologies: [] + versions_python: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "python3 --version | sed 's/Python //'": + type: eval + description: The expression to obtain the version of the tool + versions_pyarrow: + - - ${task.process}: + type: string + description: The process the versions were collected from + - pyarrow: + type: string + description: The tool name + - 'python3 -c "import pyarrow; print(pyarrow.__version__)"': + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "python3 --version | sed 's/Python //'": + type: eval + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: The process the versions were collected from + - pyarrow: + type: string + description: The tool name + - 'python3 -c "import pyarrow; print(pyarrow.__version__)"': + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@an-altosian" +maintainers: + - "@an-altosian" diff --git a/modules/local/xenium_patch/stitch/main.nf b/modules/local/xenium_patch/stitch/main.nf new file mode 100644 index 00000000..83d7fed5 --- /dev/null +++ b/modules/local/xenium_patch/stitch/main.nf @@ -0,0 +1,56 @@ +/* + * XENIUM_PATCH_STITCH: Stitch per-patch segmentation results into unified output. + * + * Uses sopa's solve_conflicts() to resolve overlapping cells at patch boundaries. + * + * Input: + * - meta: Sample metadata map + * - patches: Directory containing patch subdirectories and patch_grid.json + * + * Output: + * - xr_polygons_transcript: Stitched cell polygons and transcript metadata + * - versions: Software versions + */ +process XENIUM_PATCH_STITCH { + tag "$meta.id" + label 'process_medium' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/f9/f9c8f3a2de4e2aa94500011f7d7d09276e9b6f2d79ee8737c9098fe22d4649bc/data' : + 'community.wave.seqera.io/library/sopa_procps-ng_pyarrow:c9ce8cd2ede79d72' }" + + input: + tuple val(meta), path(patches) + + output: + tuple val(meta), + path("output/xr-cell-polygons.geojson"), + path("output/xr-transcript-metadata.csv") , emit: xr_polygons_transcript + tuple val("${task.process}"), val('python'), eval("python3 --version | sed 's/Python //'"), topic: versions, emit: versions_python + tuple val("${task.process}"), val('sopa'), eval("pip show sopa 2>/dev/null | sed -n 's/^Version: //p'"), topic: versions, emit: versions_sopa + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + xenium_patch_stitch_transcripts.py \\ + --patches ${patches} \\ + --output output \\ + ${args} + + # Post-process: ensure all GeoJSON geometries are Polygon and + # reconcile dropped cells in the transcript CSV. + xenium_patch_stitch_postprocess.py \\ + --geojson output/xr-cell-polygons.geojson \\ + --csv output/xr-transcript-metadata.csv + """ + + stub: + """ + mkdir -p output + echo '{"type":"FeatureCollection","features":[]}' > output/xr-cell-polygons.geojson + echo 'transcript_id,x,y,z,gene,cell,is_noise' > output/xr-transcript-metadata.csv + """ +} diff --git a/modules/local/xenium_patch/stitch/meta.yml b/modules/local/xenium_patch/stitch/meta.yml new file mode 100644 index 00000000..958ef907 --- /dev/null +++ b/modules/local/xenium_patch/stitch/meta.yml @@ -0,0 +1,104 @@ +name: "xenium_patch_stitch" +description: Stitch per-patch segmentation results into a unified output using sopa's solve_conflicts() to resolve overlapping cells at patch boundaries. +keywords: + - xenium + - patches + - stitching + - sopa + - segmentation +tools: + - "python": + description: | + Python programming language interpreter. + homepage: "https://www.python.org/" + documentation: "https://docs.python.org/3/" + tool_dev_url: "https://github.com/python/cpython" + doi: "no DOI available" + licence: ["PSF-2.0"] + identifier: "" + - "sopa": + description: | + Spatial-omics pipeline that handles tile-based segmentation, conflict + resolution, and aggregation across multiple platforms (Xenium, + MERSCOPE, CosMx, etc.). + homepage: "https://gustaveroussy.github.io/sopa/" + documentation: "https://gustaveroussy.github.io/sopa/" + tool_dev_url: "https://github.com/gustaveroussy/sopa" + doi: "10.1038/s41467-024-48981-z" + licence: ["BSD-3-Clause"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - patches: + type: directory + description: Directory containing per-patch subdirectories and a patch_grid.json file. + pattern: "patches" + +output: + xr_polygons_transcript: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "output/xr-cell-polygons.geojson": + type: file + description: Stitched cell polygons in GeoJSON format compatible with XeniumRanger. + pattern: "output/xr-cell-polygons.geojson" + ontologies: [] + - "output/xr-transcript-metadata.csv": + type: file + description: Per-transcript cell assignments and noise flags after stitching. + pattern: "output/xr-transcript-metadata.csv" + ontologies: [] + versions_python: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "python3 --version | sed 's/Python //'": + type: eval + description: The expression to obtain the version of the tool + versions_sopa: + - - ${task.process}: + type: string + description: The process the versions were collected from + - sopa: + type: string + description: The tool name + - 'python3 -c "import sopa; print(sopa.__version__)"': + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - python: + type: string + description: The tool name + - "python3 --version | sed 's/Python //'": + type: eval + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: The process the versions were collected from + - sopa: + type: string + description: The tool name + - 'python3 -c "import sopa; print(sopa.__version__)"': + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@an-altosian" +maintainers: + - "@an-altosian" diff --git a/modules/nf-core/cellpose/Dockerfile b/modules/nf-core/cellpose/Dockerfile new file mode 100644 index 00000000..34053226 --- /dev/null +++ b/modules/nf-core/cellpose/Dockerfile @@ -0,0 +1,25 @@ +# GPU image for cellpose segmentation +# Base: PyTorch with CUDA 12.4 (consistent CUDA support) +FROM pytorch/pytorch:2.5.1-cuda12.4-cudnn9-runtime + +# Install system dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + procps \ + libgl1 \ + libglib2.0-0 \ + && rm -rf /var/lib/apt/lists/* + +# Upgrade pip +RUN pip install --no-cache-dir --upgrade pip + +# Install cellpose and its dependencies +RUN pip install --no-cache-dir \ + cellpose==4.0.8 \ + numpy \ + scipy \ + matplotlib \ + scikit-image \ + opencv-python-headless + +# Set default shell +CMD ["/bin/bash"] diff --git a/modules/nf-core/cellpose/cellpose.diff b/modules/nf-core/cellpose/cellpose.diff new file mode 100644 index 00000000..f04e6f11 --- /dev/null +++ b/modules/nf-core/cellpose/cellpose.diff @@ -0,0 +1,32 @@ +Changes in component 'nf-core/cellpose' +'modules/nf-core/cellpose/environment.yml' is unchanged +'modules/nf-core/cellpose/meta.yml' is unchanged +'modules/nf-core/cellpose/tests/main.nf.test' is unchanged +'modules/nf-core/cellpose/tests/main.nf.test.snap' is unchanged +'modules/nf-core/cellpose/tests/nextflow_wflows.config' is unchanged +Changes in 'cellpose/main.nf': +--- modules/nf-core/cellpose/main.nf ++++ modules/nf-core/cellpose/main.nf +@@ -1,7 +1,7 @@ + process CELLPOSE { + tag "${meta.id}" +- label 'process_medium' +- label 'process_gpu' ++ label 'process_high' ++ label 'process_gpu_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? +@@ -45,6 +45,11 @@ + ${model_command} \\ + ${args} + ++ # Fail fast if cellpose detected zero cells ++ if grep -q "No cell pixels found" .cellpose/run.log 2>/dev/null; then ++ echo "ERROR: cellpose detected 0 cells" >&2; exit 1 ++ fi ++ + mkdir -p ${prefix} + mv *masks.tif ${prefix}/ + mv *flows.tif ${prefix}/ 2>/dev/null || true +************************************************************ diff --git a/modules/nf-core/cellpose/environment.yml b/modules/nf-core/cellpose/environment.yml new file mode 100644 index 00000000..e22d2601 --- /dev/null +++ b/modules/nf-core/cellpose/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::python=3.12.12 + - pip: + - cellpose==4.0.9 diff --git a/modules/nf-core/cellpose/main.nf b/modules/nf-core/cellpose/main.nf new file mode 100644 index 00000000..57fda819 --- /dev/null +++ b/modules/nf-core/cellpose/main.nf @@ -0,0 +1,68 @@ +process CELLPOSE { + tag "${meta.id}" + label 'process_high' + label 'process_gpu_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/cb/cb670191b7ae1a9fd5449746453916c7014b9ea622942ca76a7cb40da7deee46/data' : + 'community.wave.seqera.io/library/python_pip_cellpose:fdf7a8c3a305a26e' }" + + input: + tuple val(meta), path(image) + path(model) + + output: + tuple val(meta), path("${prefix}/*masks.tif"), emit: mask + tuple val(meta), path("${prefix}/*flows.tif"), emit: flows, optional: true + tuple val(meta), path("${prefix}/*seg.npy"), emit: cells, optional: true + tuple val("${task.process}"), val('cellpose'), eval("cellpose --version | sed -n 's/cellpose version:[[:space:]]*//p' | tr -d '[:space:]'"), topic: versions, emit: versions_cellpose + tuple val("${task.process}"), val('python'), eval("cellpose --version | sed -n 's/python version:[[:space:]]*//p' | tr -d '[:space:]'"), topic: versions, emit: versions_python + tuple val("${task.process}"), val('torch'), eval("cellpose --version | sed -n 's/torch version:[[:space:]]*//p' | tr -d '[:space:]'"), topic: versions, emit: versions_torch + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def model_command = model ? "--pretrained_model ${model}" : "" + def gpu_flag = task.accelerator ? "--use_gpu" : "" + prefix = task.ext.prefix ?: "${meta.id}" + """ + export OMP_NUM_THREADS=${task.cpus} + export MKL_NUM_THREADS=${task.cpus} + # Container runs as root with HOME=/ which is not writable + export HOME=\$PWD + export MPLCONFIGDIR=\$PWD/.matplotlib + export CELLPOSE_LOCAL_MODELS_PATH=\$PWD/.cellpose + mkdir -p \$MPLCONFIGDIR \$CELLPOSE_LOCAL_MODELS_PATH + + cellpose \\ + --image_path ${image} \\ + --save_tif \\ + --verbose \\ + ${gpu_flag} \\ + ${model_command} \\ + ${args} + + # Fail fast if cellpose detected zero cells + if grep -q "No cell pixels found" .cellpose/run.log 2>/dev/null; then + echo "ERROR: cellpose detected 0 cells" >&2; exit 1 + fi + + mkdir -p ${prefix} + mv *masks.tif ${prefix}/ + mv *flows.tif ${prefix}/ 2>/dev/null || true + mv *seg.npy ${prefix}/ 2>/dev/null || true + """ + + stub: + def name = image.name + def base = name.lastIndexOf('.') != -1 ? name[0..name.lastIndexOf('.') - 1] : name + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p ${prefix} + touch ${prefix}/${base}_cp_masks.tif + """ +} diff --git a/modules/nf-core/cellpose/meta.yml b/modules/nf-core/cellpose/meta.yml new file mode 100644 index 00000000..cb7a89a7 --- /dev/null +++ b/modules/nf-core/cellpose/meta.yml @@ -0,0 +1,158 @@ +name: "cellpose" +description: cellpose segments cells in images using GPU-accelerated deep learning +keywords: + - segmentation + - image + - cellpose + - gpu + - spatial-transcriptomics +tools: + - "cellpose": + description: "cellpose is an anatomical segmentation algorithm written in Python + 3 by Carsen Stringer and Marius Pachitariu" + homepage: "https://github.com/MouseLand/cellpose" + documentation: "https://cellpose.readthedocs.io/en/latest/command.html" + tool_dev_url: "https://github.com/MouseLand/cellpose" + doi: 10.1038/s41592-022-01663-4 + licence: ["BSD 3-Clause"] + identifier: biotools:cellpose +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + (sample id) + - image: + type: file + description: tif file ready for segmentation + pattern: "*.{tif,tiff}" + ontologies: [] + - model: + type: file + description: | + Optional custom cellpose model file. When provided, passed as + --pretrained_model to cellpose. Pass [] (empty list) to use the + default model (cpsam in cellpose 4). + pattern: "*" + ontologies: [] +output: + mask: + - - meta: + type: map + description: | + Groovy Map containing sample information + [sample id] + - "${prefix}/*masks.tif": + type: file + description: labelled mask output from cellpose in tif format + pattern: "${prefix}/*masks.tif" + ontologies: [] + flows: + - - meta: + type: map + description: | + Groovy Map containing sample information + [sample id] + - "${prefix}/*flows.tif": + type: file + description: cell flow output from cellpose + pattern: "${prefix}/*flows.tif" + ontologies: [] + cells: + - - meta: + type: map + description: | + Groovy Map containing sample information + [sample id] + - "${prefix}/*seg.npy": + type: file + description: numpy array with cell segmentation data + pattern: "${prefix}/*seg.npy" + ontologies: [] + versions_cellpose: + - - ${task.process}: + type: string + description: The name of the process + - cellpose: + type: string + description: The name of the tool + - "cellpose --version | sed -n 's/cellpose version:[[:space:]]*//p' | tr -d '[:space:]'": + type: eval + description: The expression to obtain the version of the tool + versions_python: + - - ${task.process}: + type: string + description: The name of the process + - python: + type: string + description: The name of the tool + - "cellpose --version | sed -n 's/python version:[[:space:]]*//p' | tr -d '[:space:]'": + type: eval + description: The expression to obtain the version of the tool + versions_torch: + - - ${task.process}: + type: string + description: The name of the process + - torch: + type: string + description: The name of the tool + - "cellpose --version | sed -n 's/torch version:[[:space:]]*//p' | tr -d '[:space:]'": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - cellpose: + type: string + description: The name of the tool + - "cellpose --version | sed -n 's/cellpose version:[[:space:]]*//p' | tr -d '[:space:]'": + type: eval + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: The name of the process + - python: + type: string + description: The name of the tool + - "cellpose --version | sed -n 's/python version:[[:space:]]*//p' | tr -d '[:space:]'": + type: eval + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: The name of the process + - torch: + type: string + description: The name of the tool + - "cellpose --version | sed -n 's/torch version:[[:space:]]*//p' | tr -d '[:space:]'": + type: eval + description: The expression to obtain the version of the tool +notes: | + When `accelerator` is set (e.g. `accelerator = 1`), the module + automatically passes `--use_gpu` to cellpose. The container (built via Seqera + Containers) includes PyTorch 2.10.0 with CUDA 12.8 and falls back to CPU + automatically when no GPU is available. Use the `process_gpu` label to request + GPU resources from your executor. When running with conda/mamba, GPU support + depends on having a CUDA-enabled PyTorch installation in your environment. + + Model selection via the model input channel: + - Custom model file: file("/path/to/model") + - Default (cpsam): [] + + Additional cellpose CLI arguments can be passed via `task.ext.args`: + ext.args = '--diameter 30 --flow_threshold 0.4 --cellprob_threshold 0' + + Deprecated in cellpose 4.0.1+: `--chan`, `--chan2`, `--invert`, `--all_channels`, + `--diam_mean`, `--pretrained_model_ortho`. Do not pass these via ext.args. + + Model weights are not bundled in the container. Cellpose downloads them on first + use to `$CELLPOSE_LOCAL_MODELS_PATH` (set to the work directory). +authors: + - "@josenimo" + - "@FloWuenne" + - "@dongzehe" +maintainers: + - "@josenimo" + - "@FloWuenne" + - "@kbestak" diff --git a/modules/nf-core/cellpose/tests/main.nf.test b/modules/nf-core/cellpose/tests/main.nf.test new file mode 100644 index 00000000..6a7688ba --- /dev/null +++ b/modules/nf-core/cellpose/tests/main.nf.test @@ -0,0 +1,63 @@ +nextflow_process { + + name "Test Process CELLPOSE" + script "../main.nf" + process "CELLPOSE" + + tag "modules" + tag "modules_nfcore" + tag "cellpose" + + test("cellpose - with flows, no model") { + + when { + config "./nextflow_wflows.config" + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'imaging/segmentation/cycif_tonsil_registered.ome.tif', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.mask).match("mask") }, + { assert snapshot(process.out.flows).match("flows") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + + test("cellpose - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'imaging/segmentation/cycif_tonsil_registered.ome.tif', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/cellpose/tests/main.nf.test.snap b/modules/nf-core/cellpose/tests/main.nf.test.snap new file mode 100644 index 00000000..e76ca251 --- /dev/null +++ b/modules/nf-core/cellpose/tests/main.nf.test.snap @@ -0,0 +1,82 @@ +{ + "flows": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-07-04T13:21:35.054592365" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,398393e73a80fc622873765256d5ec79" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-07-04T13:21:35.145644688" + }, + "cellpose - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "cycif_tonsil_registered.ome_cp_masks.tif:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,398393e73a80fc622873765256d5ec79" + ], + "flows": [ + + ], + "mask": [ + [ + { + "id": "test" + }, + "cycif_tonsil_registered.ome_cp_masks.tif:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,398393e73a80fc622873765256d5ec79" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-07-04T13:21:52.227498747" + }, + "mask": { + "content": [ + [ + [ + { + "id": "test" + }, + "cycif_tonsil_registered.ome_cp_masks.tif:md5,b151d6718fcb770b2fa3989da632d96e" + ] + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-07-04T13:21:34.95450341" + } +} \ No newline at end of file diff --git a/modules/nf-core/cellpose/tests/nextflow_wflows.config b/modules/nf-core/cellpose/tests/nextflow_wflows.config new file mode 100644 index 00000000..773c97bd --- /dev/null +++ b/modules/nf-core/cellpose/tests/nextflow_wflows.config @@ -0,0 +1,5 @@ +process { + withName: "CELLPOSE" { + ext.args = '--pretrained_model nuclei --diameter 9 --channel_axis 0 --no_npy --save_flows' + } +} diff --git a/modules/nf-core/fastqc/.conda-lock/linux_amd64-bd-5cb1a2fa2f18c7c2_1.txt b/modules/nf-core/fastqc/.conda-lock/linux_amd64-bd-5cb1a2fa2f18c7c2_1.txt deleted file mode 100644 index 7770ccd5..00000000 --- a/modules/nf-core/fastqc/.conda-lock/linux_amd64-bd-5cb1a2fa2f18c7c2_1.txt +++ /dev/null @@ -1,822 +0,0 @@ - -version: 6 -environments: -default: -channels: -- url: https://conda.anaconda.org/conda-forge/ -- url: https://conda.anaconda.org/bioconda/ -- url: https://conda.anaconda.org/bioconda/ -options: -pypi-prerelease-mode: if-necessary-or-explicit -packages: -linux-64: -- conda: https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-20_gnu.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.15.3-hb03c661_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hda65f42_9.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2026.2.25-hbd8a1cb_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/cairo-1.18.4-he90730b_1.conda -- conda: https://conda.anaconda.org/bioconda/noarch/fastqc-0.12.1-hdfd78af_0.tar.bz2 -- conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2 -- conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2 -- conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2 -- conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_3.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.17.1-h27c8c51_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2 -- conda: https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-hc364b38_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/giflib-5.2.2-hd590300_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.14-hecca717_2.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-13.2.1-h6083320_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/icu-78.3-h33c6efd_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.3-hb9d3cd8_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/krb5-1.22.2-ha1258a1_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.18-h0c24ade_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/lerc-4.1.0-hdb68285_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h7a8fb5f_6.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.25-h17f619e_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20250104-pl5321h7949ede_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.4-hecca717_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libffi-3.5.2-h3435931_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libfreetype-2.14.3-ha770c72_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libfreetype6-2.14.3-h73754d4_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-15.2.0-he0feb66_18.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-15.2.0-h69a702a_18.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libglib-2.86.4-h6548e54_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libgomp-15.2.0-he0feb66_18.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.18-h3b78370_2.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.1.2-hb03c661_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.2-hb03c661_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.55-h421ea60_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-15.2.0-h934c35e_18.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.7.1-h9d88235_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.41.3-h5347b49_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.6.0-hd42ef1d_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.17.0-h8a09558_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.2-h25fd6f3_2.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h2d0b736_3.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/openjdk-25.0.2-ha668962_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/openssl-3.6.1-h35e630c_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.47-haa7fec5_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/perl-5.32.1-7_hd590300_perl5.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/pixman-0.46.4-h54a6638_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/procps-ng-4.0.6-h18c060e_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-hb9d3cd8_1002.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.1.2-hb9d3cd8_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.6-he73a12e_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.13-he1eb515_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.12-hb03c661_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.5-hb03c661_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxext-1.3.7-hb03c661_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxfixes-6.0.2-hb03c661_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxi-1.8.2-hb9d3cd8_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrandr-1.5.5-hb03c661_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.12-hb9d3cd8_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxt-1.3.1-hb9d3cd8_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxtst-1.2.5-hb9d3cd8_3.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.7-hb78ec9c_6.conda -packages: -- conda: https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-20_gnu.conda -build_number: 20 -sha256: 1dd3fffd892081df9726d7eb7e0dea6198962ba775bd88842135a4ddb4deb3c9 -md5: a9f577daf3de00bca7c3c76c0ecbd1de -depends: -- __glibc >=2.17,<3.0.a0 -- libgomp >=7.5.0 -constrains: -- openmp_impl <0.0a0 -license: BSD-3-Clause -license_family: BSD -size: 28948 -timestamp: 1770939786096 -- conda: https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.15.3-hb03c661_0.conda -sha256: d88aa7ae766cf584e180996e92fef2aa7d8e0a0a5ab1d4d49c32390c1b5fff31 -md5: dcdc58c15961dbf17a0621312b01f5cb -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -license: LGPL-2.1-or-later -license_family: GPL -size: 584660 -timestamp: 1768327524772 -- conda: https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hda65f42_9.conda -sha256: 0b75d45f0bba3e95dc693336fa51f40ea28c980131fec438afb7ce6118ed05f6 -md5: d2ffd7602c02f2b316fd921d39876885 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -license: bzip2-1.0.6 -license_family: BSD -size: 260182 -timestamp: 1771350215188 -- conda: https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2026.2.25-hbd8a1cb_0.conda -sha256: 67cc7101b36421c5913a1687ef1b99f85b5d6868da3abbf6ec1a4181e79782fc -md5: 4492fd26db29495f0ba23f146cd5638d -depends: -- __unix -license: ISC -size: 147413 -timestamp: 1772006283803 -- conda: https://conda.anaconda.org/conda-forge/linux-64/cairo-1.18.4-he90730b_1.conda -sha256: 06525fa0c4e4f56e771a3b986d0fdf0f0fc5a3270830ee47e127a5105bde1b9a -md5: bb6c4808bfa69d6f7f6b07e5846ced37 -depends: -- __glibc >=2.17,<3.0.a0 -- fontconfig >=2.15.0,<3.0a0 -- fonts-conda-ecosystem -- icu >=78.1,<79.0a0 -- libexpat >=2.7.3,<3.0a0 -- libfreetype >=2.14.1 -- libfreetype6 >=2.14.1 -- libgcc >=14 -- libglib >=2.86.3,<3.0a0 -- libpng >=1.6.53,<1.7.0a0 -- libstdcxx >=14 -- libxcb >=1.17.0,<2.0a0 -- libzlib >=1.3.1,<2.0a0 -- pixman >=0.46.4,<1.0a0 -- xorg-libice >=1.1.2,<2.0a0 -- xorg-libsm >=1.2.6,<2.0a0 -- xorg-libx11 >=1.8.12,<2.0a0 -- xorg-libxext >=1.3.6,<2.0a0 -- xorg-libxrender >=0.9.12,<0.10.0a0 -license: LGPL-2.1-only or MPL-1.1 -size: 989514 -timestamp: 1766415934926 -- conda: https://conda.anaconda.org/bioconda/noarch/fastqc-0.12.1-hdfd78af_0.tar.bz2 -sha256: 7cc26225d590540ae95cd24940ff42f2da7479dd4cd22ae9ab9298665d06790c -md5: c9f6a4b12229f7331f79c9a00dd6e240 -depends: -- font-ttf-dejavu-sans-mono -- fontconfig -- openjdk >=8.0.144 -- perl -license: GPL >=3 -size: 11664291 -timestamp: 1677946722445 -- conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2 -sha256: 58d7f40d2940dd0a8aa28651239adbf5613254df0f75789919c4e6762054403b -md5: 0c96522c6bdaed4b1566d11387caaf45 -license: BSD-3-Clause -license_family: BSD -size: 397370 -timestamp: 1566932522327 -- conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2 -sha256: c52a29fdac682c20d252facc50f01e7c2e7ceac52aa9817aaf0bb83f7559ec5c -md5: 34893075a5c9e55cdafac56607368fc6 -license: OFL-1.1 -license_family: Other -size: 96530 -timestamp: 1620479909603 -- conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2 -sha256: 00925c8c055a2275614b4d983e1df637245e19058d79fc7dd1a93b8d9fb4b139 -md5: 4d59c254e01d9cde7957100457e2d5fb -license: OFL-1.1 -license_family: Other -size: 700814 -timestamp: 1620479612257 -- conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_3.conda -sha256: 2821ec1dc454bd8b9a31d0ed22a7ce22422c0aef163c59f49dfdf915d0f0ca14 -md5: 49023d73832ef61042f6a237cb2687e7 -license: LicenseRef-Ubuntu-Font-Licence-Version-1.0 -license_family: Other -size: 1620504 -timestamp: 1727511233259 -- conda: https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.17.1-h27c8c51_0.conda -sha256: aa4a44dba97151221100a637c7f4bde619567afade9c0265f8e1c8eed8d7bd8c -md5: 867127763fbe935bab59815b6e0b7b5c -depends: -- __glibc >=2.17,<3.0.a0 -- libexpat >=2.7.4,<3.0a0 -- libfreetype >=2.14.1 -- libfreetype6 >=2.14.1 -- libgcc >=14 -- libuuid >=2.41.3,<3.0a0 -- libzlib >=1.3.1,<2.0a0 -license: MIT -license_family: MIT -size: 270705 -timestamp: 1771382710863 -- conda: https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2 -sha256: a997f2f1921bb9c9d76e6fa2f6b408b7fa549edd349a77639c9fe7a23ea93e61 -md5: fee5683a3f04bd15cbd8318b096a27ab -depends: -- fonts-conda-forge -license: BSD-3-Clause -license_family: BSD -size: 3667 -timestamp: 1566974674465 -- conda: https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-hc364b38_1.conda -sha256: 54eea8469786bc2291cc40bca5f46438d3e062a399e8f53f013b6a9f50e98333 -md5: a7970cd949a077b7cb9696379d338681 -depends: -- font-ttf-ubuntu -- font-ttf-inconsolata -- font-ttf-dejavu-sans-mono -- font-ttf-source-code-pro -license: BSD-3-Clause -license_family: BSD -size: 4059 -timestamp: 1762351264405 -- conda: https://conda.anaconda.org/conda-forge/linux-64/giflib-5.2.2-hd590300_0.conda -sha256: aac402a8298f0c0cc528664249170372ef6b37ac39fdc92b40601a6aed1e32ff -md5: 3bf7b9fd5a7136126e0234db4b87c8b6 -depends: -- libgcc-ng >=12 -license: MIT -license_family: MIT -size: 77248 -timestamp: 1712692454246 -- conda: https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.14-hecca717_2.conda -sha256: 25ba37da5c39697a77fce2c9a15e48cf0a84f1464ad2aafbe53d8357a9f6cc8c -md5: 2cd94587f3a401ae05e03a6caf09539d -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -- libstdcxx >=14 -license: LGPL-2.0-or-later -license_family: LGPL -size: 99596 -timestamp: 1755102025473 -- conda: https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-13.2.1-h6083320_0.conda -sha256: 477f2c553f72165020d3c56740ba354be916c2f0b76fd9f535e83d698277d5ec -md5: 14470902326beee192e33719a2e8bb7f -depends: -- __glibc >=2.17,<3.0.a0 -- cairo >=1.18.4,<2.0a0 -- graphite2 >=1.3.14,<2.0a0 -- icu >=78.3,<79.0a0 -- libexpat >=2.7.4,<3.0a0 -- libfreetype >=2.14.2 -- libfreetype6 >=2.14.2 -- libgcc >=14 -- libglib >=2.86.4,<3.0a0 -- libstdcxx >=14 -- libzlib >=1.3.2,<2.0a0 -license: MIT -license_family: MIT -size: 2384060 -timestamp: 1774276284520 -- conda: https://conda.anaconda.org/conda-forge/linux-64/icu-78.3-h33c6efd_0.conda -sha256: fbf86c4a59c2ed05bbffb2ba25c7ed94f6185ec30ecb691615d42342baa1a16a -md5: c80d8a3b84358cb967fa81e7075fbc8a -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -- libstdcxx >=14 -license: MIT -license_family: MIT -size: 12723451 -timestamp: 1773822285671 -- conda: https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.3-hb9d3cd8_0.conda -sha256: 0960d06048a7185d3542d850986d807c6e37ca2e644342dd0c72feefcf26c2a4 -md5: b38117a3c920364aff79f870c984b4a3 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=13 -license: LGPL-2.1-or-later -size: 134088 -timestamp: 1754905959823 -- conda: https://conda.anaconda.org/conda-forge/linux-64/krb5-1.22.2-ha1258a1_0.conda -sha256: 3e307628ca3527448dd1cb14ad7bb9d04d1d28c7d4c5f97ba196ae984571dd25 -md5: fb53fb07ce46a575c5d004bbc96032c2 -depends: -- __glibc >=2.17,<3.0.a0 -- keyutils >=1.6.3,<2.0a0 -- libedit >=3.1.20250104,<3.2.0a0 -- libedit >=3.1.20250104,<4.0a0 -- libgcc >=14 -- libstdcxx >=14 -- openssl >=3.5.5,<4.0a0 -license: MIT -license_family: MIT -size: 1386730 -timestamp: 1769769569681 -- conda: https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.18-h0c24ade_0.conda -sha256: 836ec4b895352110335b9fdcfa83a8dcdbe6c5fb7c06c4929130600caea91c0a -md5: 6f2e2c8f58160147c4d1c6f4c14cbac4 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -- libjpeg-turbo >=3.1.2,<4.0a0 -- libtiff >=4.7.1,<4.8.0a0 -license: MIT -license_family: MIT -size: 249959 -timestamp: 1768184673131 -- conda: https://conda.anaconda.org/conda-forge/linux-64/lerc-4.1.0-hdb68285_0.conda -sha256: f84cb54782f7e9cea95e810ea8fef186e0652d0fa73d3009914fa2c1262594e1 -md5: a752488c68f2e7c456bcbd8f16eec275 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -- libstdcxx >=14 -license: Apache-2.0 -license_family: Apache -size: 261513 -timestamp: 1773113328888 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h7a8fb5f_6.conda -sha256: 205c4f19550f3647832ec44e35e6d93c8c206782bdd620c1d7cf66237580ff9c -md5: 49c553b47ff679a6a1e9fc80b9c5a2d4 -depends: -- __glibc >=2.17,<3.0.a0 -- krb5 >=1.22.2,<1.23.0a0 -- libgcc >=14 -- libstdcxx >=14 -- libzlib >=1.3.1,<2.0a0 -license: Apache-2.0 -license_family: Apache -size: 4518030 -timestamp: 1770902209173 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.25-h17f619e_0.conda -sha256: aa8e8c4be9a2e81610ddf574e05b64ee131fab5e0e3693210c9d6d2fba32c680 -md5: 6c77a605a7a689d17d4819c0f8ac9a00 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -license: MIT -license_family: MIT -size: 73490 -timestamp: 1761979956660 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20250104-pl5321h7949ede_0.conda -sha256: d789471216e7aba3c184cd054ed61ce3f6dac6f87a50ec69291b9297f8c18724 -md5: c277e0a4d549b03ac1e9d6cbbe3d017b -depends: -- ncurses -- __glibc >=2.17,<3.0.a0 -- libgcc >=13 -- ncurses >=6.5,<7.0a0 -license: BSD-2-Clause -license_family: BSD -size: 134676 -timestamp: 1738479519902 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.4-hecca717_0.conda -sha256: d78f1d3bea8c031d2f032b760f36676d87929b18146351c4464c66b0869df3f5 -md5: e7f7ce06ec24cfcfb9e36d28cf82ba57 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -constrains: -- expat 2.7.4.* -license: MIT -license_family: MIT -size: 76798 -timestamp: 1771259418166 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libffi-3.5.2-h3435931_0.conda -sha256: 31f19b6a88ce40ebc0d5a992c131f57d919f73c0b92cd1617a5bec83f6e961e6 -md5: a360c33a5abe61c07959e449fa1453eb -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -license: MIT -license_family: MIT -size: 58592 -timestamp: 1769456073053 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libfreetype-2.14.3-ha770c72_0.conda -sha256: 38f014a7129e644636e46064ecd6b1945e729c2140e21d75bb476af39e692db2 -md5: e289f3d17880e44b633ba911d57a321b -depends: -- libfreetype6 >=2.14.3 -license: GPL-2.0-only OR FTL -size: 8049 -timestamp: 1774298163029 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libfreetype6-2.14.3-h73754d4_0.conda -sha256: 16f020f96da79db1863fcdd8f2b8f4f7d52f177dd4c58601e38e9182e91adf1d -md5: fb16b4b69e3f1dcfe79d80db8fd0c55d -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -- libpng >=1.6.55,<1.7.0a0 -- libzlib >=1.3.2,<2.0a0 -constrains: -- freetype >=2.14.3 -license: GPL-2.0-only OR FTL -size: 384575 -timestamp: 1774298162622 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-15.2.0-he0feb66_18.conda -sha256: faf7d2017b4d718951e3a59d081eb09759152f93038479b768e3d612688f83f5 -md5: 0aa00f03f9e39fb9876085dee11a85d4 -depends: -- __glibc >=2.17,<3.0.a0 -- _openmp_mutex >=4.5 -constrains: -- libgcc-ng ==15.2.0=*_18 -- libgomp 15.2.0 he0feb66_18 -license: GPL-3.0-only WITH GCC-exception-3.1 -license_family: GPL -size: 1041788 -timestamp: 1771378212382 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-15.2.0-h69a702a_18.conda -sha256: e318a711400f536c81123e753d4c797a821021fb38970cebfb3f454126016893 -md5: d5e96b1ed75ca01906b3d2469b4ce493 -depends: -- libgcc 15.2.0 he0feb66_18 -license: GPL-3.0-only WITH GCC-exception-3.1 -license_family: GPL -size: 27526 -timestamp: 1771378224552 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libglib-2.86.4-h6548e54_1.conda -sha256: a27e44168a1240b15659888ce0d9b938ed4bdb49e9ea68a7c1ff27bcea8b55ce -md5: bb26456332b07f68bf3b7622ed71c0da -depends: -- __glibc >=2.17,<3.0.a0 -- libffi >=3.5.2,<3.6.0a0 -- libgcc >=14 -- libiconv >=1.18,<2.0a0 -- libzlib >=1.3.1,<2.0a0 -- pcre2 >=10.47,<10.48.0a0 -constrains: -- glib 2.86.4 *_1 -license: LGPL-2.1-or-later -size: 4398701 -timestamp: 1771863239578 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libgomp-15.2.0-he0feb66_18.conda -sha256: 21337ab58e5e0649d869ab168d4e609b033509de22521de1bfed0c031bfc5110 -md5: 239c5e9546c38a1e884d69effcf4c882 -depends: -- __glibc >=2.17,<3.0.a0 -license: GPL-3.0-only WITH GCC-exception-3.1 -license_family: GPL -size: 603262 -timestamp: 1771378117851 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.18-h3b78370_2.conda -sha256: c467851a7312765447155e071752d7bf9bf44d610a5687e32706f480aad2833f -md5: 915f5995e94f60e9a4826e0b0920ee88 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -license: LGPL-2.1-only -size: 790176 -timestamp: 1754908768807 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.1.2-hb03c661_0.conda -sha256: cc9aba923eea0af8e30e0f94f2ad7156e2984d80d1e8e7fe6be5a1f257f0eb32 -md5: 8397539e3a0bbd1695584fb4f927485a -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -constrains: -- jpeg <0.0.0a -license: IJG AND BSD-3-Clause AND Zlib -size: 633710 -timestamp: 1762094827865 -- conda: https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.2-hb03c661_0.conda -sha256: 755c55ebab181d678c12e49cced893598f2bab22d582fbbf4d8b83c18be207eb -md5: c7c83eecbb72d88b940c249af56c8b17 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -constrains: -- xz 5.8.2.* -license: 0BSD -size: 113207 -timestamp: 1768752626120 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.55-h421ea60_0.conda -sha256: 36ade759122cdf0f16e2a2562a19746d96cf9c863ffaa812f2f5071ebbe9c03c -md5: 5f13ffc7d30ffec87864e678df9957b4 -depends: -- libgcc >=14 -- __glibc >=2.17,<3.0.a0 -- libzlib >=1.3.1,<2.0a0 -license: zlib-acknowledgement -size: 317669 -timestamp: 1770691470744 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-15.2.0-h934c35e_18.conda -sha256: 78668020064fdaa27e9ab65cd2997e2c837b564ab26ce3bf0e58a2ce1a525c6e -md5: 1b08cd684f34175e4514474793d44bcb -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc 15.2.0 he0feb66_18 -constrains: -- libstdcxx-ng ==15.2.0=*_18 -license: GPL-3.0-only WITH GCC-exception-3.1 -license_family: GPL -size: 5852330 -timestamp: 1771378262446 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.7.1-h9d88235_1.conda -sha256: e5f8c38625aa6d567809733ae04bb71c161a42e44a9fa8227abe61fa5c60ebe0 -md5: cd5a90476766d53e901500df9215e927 -depends: -- __glibc >=2.17,<3.0.a0 -- lerc >=4.0.0,<5.0a0 -- libdeflate >=1.25,<1.26.0a0 -- libgcc >=14 -- libjpeg-turbo >=3.1.0,<4.0a0 -- liblzma >=5.8.1,<6.0a0 -- libstdcxx >=14 -- libwebp-base >=1.6.0,<2.0a0 -- libzlib >=1.3.1,<2.0a0 -- zstd >=1.5.7,<1.6.0a0 -license: HPND -size: 435273 -timestamp: 1762022005702 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.41.3-h5347b49_0.conda -sha256: 1a7539cfa7df00714e8943e18de0b06cceef6778e420a5ee3a2a145773758aee -md5: db409b7c1720428638e7c0d509d3e1b5 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -license: BSD-3-Clause -license_family: BSD -size: 40311 -timestamp: 1766271528534 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.6.0-hd42ef1d_0.conda -sha256: 3aed21ab28eddffdaf7f804f49be7a7d701e8f0e46c856d801270b470820a37b -md5: aea31d2e5b1091feca96fcfe945c3cf9 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -constrains: -- libwebp 1.6.0 -license: BSD-3-Clause -license_family: BSD -size: 429011 -timestamp: 1752159441324 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.17.0-h8a09558_0.conda -sha256: 666c0c431b23c6cec6e492840b176dde533d48b7e6fb8883f5071223433776aa -md5: 92ed62436b625154323d40d5f2f11dd7 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=13 -- pthread-stubs -- xorg-libxau >=1.0.11,<2.0a0 -- xorg-libxdmcp -license: MIT -license_family: MIT -size: 395888 -timestamp: 1727278577118 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda -sha256: 6ae68e0b86423ef188196fff6207ed0c8195dd84273cb5623b85aa08033a410c -md5: 5aa797f8787fe7a17d1b0821485b5adc -depends: -- libgcc-ng >=12 -license: LGPL-2.1-or-later -size: 100393 -timestamp: 1702724383534 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.2-h25fd6f3_2.conda -sha256: 55044c403570f0dc26e6364de4dc5368e5f3fc7ff103e867c487e2b5ab2bcda9 -md5: d87ff7921124eccd67248aa483c23fec -depends: -- __glibc >=2.17,<3.0.a0 -constrains: -- zlib 1.3.2 *_2 -license: Zlib -license_family: Other -size: 63629 -timestamp: 1774072609062 -- conda: https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h2d0b736_3.conda -sha256: 3fde293232fa3fca98635e1167de6b7c7fda83caf24b9d6c91ec9eefb4f4d586 -md5: 47e340acb35de30501a76c7c799c41d7 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=13 -license: X11 AND BSD-3-Clause -size: 891641 -timestamp: 1738195959188 -- conda: https://conda.anaconda.org/conda-forge/linux-64/openjdk-25.0.2-ha668962_0.conda -sha256: 3825a4c84676a8a5cc23b397a2911e4efa4a805daf2af764153bd904e142ec41 -md5: a41092b0177362dbe5eb2a18501e86c0 -depends: -- xorg-libx11 -- xorg-libxext -- xorg-libxi -- xorg-libxrender -- xorg-libxtst -- libstdcxx >=14 -- libgcc >=14 -- __glibc >=2.17,<3.0.a0 -- libfreetype >=2.14.1 -- libfreetype6 >=2.14.1 -- xorg-libxrender >=0.9.12,<0.10.0a0 -- libjpeg-turbo >=3.1.2,<4.0a0 -- giflib >=5.2.2,<5.3.0a0 -- xorg-libxrandr >=1.5.5,<2.0a0 -- harfbuzz >=12.3.2 -- fontconfig >=2.17.1,<3.0a0 -- fonts-conda-ecosystem -- xorg-libxtst >=1.2.5,<2.0a0 -- xorg-libxi >=1.8.2,<2.0a0 -- lcms2 >=2.18,<3.0a0 -- alsa-lib >=1.2.15.3,<1.3.0a0 -- libpng >=1.6.55,<1.7.0a0 -- xorg-libxt >=1.3.1,<2.0a0 -- libzlib >=1.3.1,<2.0a0 -- xorg-libxext >=1.3.7,<2.0a0 -- xorg-libx11 >=1.8.13,<2.0a0 -- libcups >=2.3.3,<2.4.0a0 -license: GPL-2.0-or-later WITH Classpath-exception-2.0 -license_family: GPL -size: 122465031 -timestamp: 1771443671180 -- conda: https://conda.anaconda.org/conda-forge/linux-64/openssl-3.6.1-h35e630c_1.conda -sha256: 44c877f8af015332a5d12f5ff0fb20ca32f896526a7d0cdb30c769df1144fb5c -md5: f61eb8cd60ff9057122a3d338b99c00f -depends: -- __glibc >=2.17,<3.0.a0 -- ca-certificates -- libgcc >=14 -license: Apache-2.0 -license_family: Apache -size: 3164551 -timestamp: 1769555830639 -- conda: https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.47-haa7fec5_0.conda -sha256: 5e6f7d161356fefd981948bea5139c5aa0436767751a6930cb1ca801ebb113ff -md5: 7a3bff861a6583f1889021facefc08b1 -depends: -- __glibc >=2.17,<3.0.a0 -- bzip2 >=1.0.8,<2.0a0 -- libgcc >=14 -- libzlib >=1.3.1,<2.0a0 -license: BSD-3-Clause -license_family: BSD -size: 1222481 -timestamp: 1763655398280 -- conda: https://conda.anaconda.org/conda-forge/linux-64/perl-5.32.1-7_hd590300_perl5.conda -build_number: 7 -sha256: 9ec32b6936b0e37bcb0ed34f22ec3116e75b3c0964f9f50ecea5f58734ed6ce9 -md5: f2cfec9406850991f4e3d960cc9e3321 -depends: -- libgcc-ng >=12 -- libxcrypt >=4.4.36 -license: GPL-1.0-or-later OR Artistic-1.0-Perl -size: 13344463 -timestamp: 1703310653947 -- conda: https://conda.anaconda.org/conda-forge/linux-64/pixman-0.46.4-h54a6638_1.conda -sha256: 43d37bc9ca3b257c5dd7bf76a8426addbdec381f6786ff441dc90b1a49143b6a -md5: c01af13bdc553d1a8fbfff6e8db075f0 -depends: -- libgcc >=14 -- libstdcxx >=14 -- libgcc >=14 -- __glibc >=2.17,<3.0.a0 -license: MIT -license_family: MIT -size: 450960 -timestamp: 1754665235234 -- conda: https://conda.anaconda.org/conda-forge/linux-64/procps-ng-4.0.6-h18c060e_0.conda -sha256: 4ce2e1ee31a6217998f78c31ce7dc0a3e0557d9238b51d49dd20c52d467a126d -md5: f2c23a77b25efcad57d377b34bd84941 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -- ncurses >=6.5,<7.0a0 -license: GPL-2.0-or-later AND LGPL-2.0-or-later -license_family: GPL -size: 593603 -timestamp: 1769710381284 -- conda: https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-hb9d3cd8_1002.conda -sha256: 9c88f8c64590e9567c6c80823f0328e58d3b1efb0e1c539c0315ceca764e0973 -md5: b3c17d95b5a10c6e64a21fa17573e70e -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=13 -license: MIT -license_family: MIT -size: 8252 -timestamp: 1726802366959 -- conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.1.2-hb9d3cd8_0.conda -sha256: c12396aabb21244c212e488bbdc4abcdef0b7404b15761d9329f5a4a39113c4b -md5: fb901ff28063514abb6046c9ec2c4a45 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=13 -license: MIT -license_family: MIT -size: 58628 -timestamp: 1734227592886 -- conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.6-he73a12e_0.conda -sha256: 277841c43a39f738927145930ff963c5ce4c4dacf66637a3d95d802a64173250 -md5: 1c74ff8c35dcadf952a16f752ca5aa49 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=13 -- libuuid >=2.38.1,<3.0a0 -- xorg-libice >=1.1.2,<2.0a0 -license: MIT -license_family: MIT -size: 27590 -timestamp: 1741896361728 -- conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.13-he1eb515_0.conda -sha256: 516d4060139dbb4de49a4dcdc6317a9353fb39ebd47789c14e6fe52de0deee42 -md5: 861fb6ccbc677bb9a9fb2468430b9c6a -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -- libxcb >=1.17.0,<2.0a0 -license: MIT -license_family: MIT -size: 839652 -timestamp: 1770819209719 -- conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.12-hb03c661_1.conda -sha256: 6bc6ab7a90a5d8ac94c7e300cc10beb0500eeba4b99822768ca2f2ef356f731b -md5: b2895afaf55bf96a8c8282a2e47a5de0 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -license: MIT -license_family: MIT -size: 15321 -timestamp: 1762976464266 -- conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.5-hb03c661_1.conda -sha256: 25d255fb2eef929d21ff660a0c687d38a6d2ccfbcbf0cc6aa738b12af6e9d142 -md5: 1dafce8548e38671bea82e3f5c6ce22f -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -license: MIT -license_family: MIT -size: 20591 -timestamp: 1762976546182 -- conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxext-1.3.7-hb03c661_0.conda -sha256: 79c60fc6acfd3d713d6340d3b4e296836a0f8c51602327b32794625826bd052f -md5: 34e54f03dfea3e7a2dcf1453a85f1085 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -- xorg-libx11 >=1.8.12,<2.0a0 -license: MIT -license_family: MIT -size: 50326 -timestamp: 1769445253162 -- conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxfixes-6.0.2-hb03c661_0.conda -sha256: 83c4c99d60b8784a611351220452a0a85b080668188dce5dfa394b723d7b64f4 -md5: ba231da7fccf9ea1e768caf5c7099b84 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -- xorg-libx11 >=1.8.12,<2.0a0 -license: MIT -license_family: MIT -size: 20071 -timestamp: 1759282564045 -- conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxi-1.8.2-hb9d3cd8_0.conda -sha256: 1a724b47d98d7880f26da40e45f01728e7638e6ec69f35a3e11f92acd05f9e7a -md5: 17dcc85db3c7886650b8908b183d6876 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=13 -- xorg-libx11 >=1.8.10,<2.0a0 -- xorg-libxext >=1.3.6,<2.0a0 -- xorg-libxfixes >=6.0.1,<7.0a0 -license: MIT -license_family: MIT -size: 47179 -timestamp: 1727799254088 -- conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrandr-1.5.5-hb03c661_0.conda -sha256: 80ed047a5cb30632c3dc5804c7716131d767089f65877813d4ae855ee5c9d343 -md5: e192019153591938acf7322b6459d36e -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -- xorg-libx11 >=1.8.12,<2.0a0 -- xorg-libxext >=1.3.6,<2.0a0 -- xorg-libxrender >=0.9.12,<0.10.0a0 -license: MIT -license_family: MIT -size: 30456 -timestamp: 1769445263457 -- conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.12-hb9d3cd8_0.conda -sha256: 044c7b3153c224c6cedd4484dd91b389d2d7fd9c776ad0f4a34f099b3389f4a1 -md5: 96d57aba173e878a2089d5638016dc5e -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=13 -- xorg-libx11 >=1.8.10,<2.0a0 -license: MIT -license_family: MIT -size: 33005 -timestamp: 1734229037766 -- conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxt-1.3.1-hb9d3cd8_0.conda -sha256: a8afba4a55b7b530eb5c8ad89737d60d60bc151a03fbef7a2182461256953f0e -md5: 279b0de5f6ba95457190a1c459a64e31 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=13 -- xorg-libice >=1.1.1,<2.0a0 -- xorg-libsm >=1.2.4,<2.0a0 -- xorg-libx11 >=1.8.10,<2.0a0 -license: MIT -license_family: MIT -size: 379686 -timestamp: 1731860547604 -- conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxtst-1.2.5-hb9d3cd8_3.conda -sha256: 752fdaac5d58ed863bbf685bb6f98092fe1a488ea8ebb7ed7b606ccfce08637a -md5: 7bbe9a0cc0df0ac5f5a8ad6d6a11af2f -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=13 -- xorg-libx11 >=1.8.10,<2.0a0 -- xorg-libxext >=1.3.6,<2.0a0 -- xorg-libxi >=1.7.10,<2.0a0 -license: MIT -license_family: MIT -size: 32808 -timestamp: 1727964811275 -- conda: https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.7-hb78ec9c_6.conda -sha256: 68f0206ca6e98fea941e5717cec780ed2873ffabc0e1ed34428c061e2c6268c7 -md5: 4a13eeac0b5c8e5b8ab496e6c4ddd829 -depends: -- __glibc >=2.17,<3.0.a0 -- libzlib >=1.3.1,<2.0a0 -license: BSD-3-Clause -license_family: BSD -size: 601375 -timestamp: 1764777111296 diff --git a/modules/nf-core/fastqc/.conda-lock/linux_arm64-bd-e455e32f745abe68_1.txt b/modules/nf-core/fastqc/.conda-lock/linux_arm64-bd-e455e32f745abe68_1.txt deleted file mode 100644 index cdc434ca..00000000 --- a/modules/nf-core/fastqc/.conda-lock/linux_arm64-bd-e455e32f745abe68_1.txt +++ /dev/null @@ -1,769 +0,0 @@ - -version: 6 -environments: -default: -channels: -- url: https://conda.anaconda.org/conda-forge/ -- url: https://conda.anaconda.org/bioconda/ -- url: https://conda.anaconda.org/bioconda/ -options: -pypi-prerelease-mode: if-necessary-or-explicit -packages: -linux-aarch64: -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/_openmp_mutex-4.5-20_gnu.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/alsa-lib-1.2.15.3-he30d5cf_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/bzip2-1.0.8-h4777abc_9.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2026.2.25-hbd8a1cb_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cairo-1.18.4-h0b6afd8_1.conda -- conda: https://conda.anaconda.org/bioconda/noarch/fastqc-0.12.1-hdfd78af_0.tar.bz2 -- conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2 -- conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2 -- conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2 -- conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_3.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fontconfig-2.17.1-hba86a56_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2 -- conda: https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-hc364b38_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/giflib-5.2.2-h31becfc_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/graphite2-1.3.14-hfae3067_2.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/harfbuzz-13.2.1-h1134a53_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/icu-78.3-hcab7f73_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/keyutils-1.6.3-h86ecc28_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/krb5-1.22.2-hfd895c2_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/lcms2-2.18-h9d5b58d_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/lerc-4.1.0-h52b7260_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcups-2.3.3-h4f2b762_6.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libdeflate-1.25-h1af38f5_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libedit-3.1.20250104-pl5321h976ea20_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libexpat-2.7.4-hfae3067_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libffi-3.5.2-h376a255_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libfreetype-2.14.3-h8af1aa0_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libfreetype6-2.14.3-hdae7a39_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgcc-15.2.0-h8acb6b2_18.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgcc-ng-15.2.0-he9431aa_18.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libglib-2.86.4-hf53f6bf_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgomp-15.2.0-h8acb6b2_18.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libiconv-1.18-h90929bb_2.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libjpeg-turbo-3.1.2-he30d5cf_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/liblzma-5.8.2-he30d5cf_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libpng-1.6.55-h1abf092_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libstdcxx-15.2.0-hef695bb_18.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libtiff-4.7.1-hdb009f0_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libuuid-2.41.3-h1022ec0_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libwebp-base-1.6.0-ha2e29f5_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libxcb-1.17.0-h262b8f6_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libxcrypt-4.4.36-h31becfc_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libzlib-1.3.2-hdc9db2a_2.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/ncurses-6.5-ha32ae93_3.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/openjdk-25.0.2-h488f50d_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/openssl-3.6.1-h546c87b_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/pcre2-10.47-hf841c20_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/perl-5.32.1-7_h31becfc_perl5.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/pixman-0.46.4-h7ac5ae9_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/procps-ng-4.0.6-h1779866_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/pthread-stubs-0.4-h86ecc28_1002.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libice-1.1.2-h86ecc28_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libsm-1.2.6-h0808dbd_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libx11-1.8.13-h63a1b12_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxau-1.0.12-he30d5cf_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxdmcp-1.1.5-he30d5cf_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxext-1.3.7-he30d5cf_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxfixes-6.0.2-he30d5cf_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxi-1.8.2-h57736b2_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxrandr-1.5.5-he30d5cf_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxrender-0.9.12-h86ecc28_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxt-1.3.1-h57736b2_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxtst-1.2.5-h57736b2_3.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/zstd-1.5.7-h85ac4a6_6.conda -packages: -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/_openmp_mutex-4.5-20_gnu.conda -build_number: 20 -sha256: a2527b1d81792a0ccd2c05850960df119c2b6d8f5fdec97f2db7d25dc23b1068 -md5: 468fd3bb9e1f671d36c2cbc677e56f1d -depends: -- libgomp >=7.5.0 -constrains: -- openmp_impl <0.0a0 -license: BSD-3-Clause -license_family: BSD -size: 28926 -timestamp: 1770939656741 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/alsa-lib-1.2.15.3-he30d5cf_0.conda -sha256: ea2233e2db9908c2e5f29d3ca420a546b4583253f4f70abb5494cdd676866d42 -md5: 4a98cbc4ade694520227402ff8880630 -depends: -- libgcc >=14 -license: LGPL-2.1-or-later -license_family: GPL -size: 615729 -timestamp: 1768327548407 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/bzip2-1.0.8-h4777abc_9.conda -sha256: b3495077889dde6bb370938e7db82be545c73e8589696ad0843a32221520ad4c -md5: 840d8fc0d7b3209be93080bc20e07f2d -depends: -- libgcc >=14 -license: bzip2-1.0.6 -license_family: BSD -size: 192412 -timestamp: 1771350241232 -- conda: https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2026.2.25-hbd8a1cb_0.conda -sha256: 67cc7101b36421c5913a1687ef1b99f85b5d6868da3abbf6ec1a4181e79782fc -md5: 4492fd26db29495f0ba23f146cd5638d -depends: -- __unix -license: ISC -size: 147413 -timestamp: 1772006283803 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cairo-1.18.4-h0b6afd8_1.conda -sha256: 675db823f3d6fb6bf747fab3b0170ba99b269a07cf6df1e49fff2f9972be9cd1 -md5: 043c13ed3a18396994be9b4fab6572ad -depends: -- fontconfig >=2.15.0,<3.0a0 -- fonts-conda-ecosystem -- icu >=78.1,<79.0a0 -- libexpat >=2.7.3,<3.0a0 -- libfreetype >=2.14.1 -- libfreetype6 >=2.14.1 -- libgcc >=14 -- libglib >=2.86.3,<3.0a0 -- libpng >=1.6.53,<1.7.0a0 -- libstdcxx >=14 -- libxcb >=1.17.0,<2.0a0 -- libzlib >=1.3.1,<2.0a0 -- pixman >=0.46.4,<1.0a0 -- xorg-libice >=1.1.2,<2.0a0 -- xorg-libsm >=1.2.6,<2.0a0 -- xorg-libx11 >=1.8.12,<2.0a0 -- xorg-libxext >=1.3.6,<2.0a0 -- xorg-libxrender >=0.9.12,<0.10.0a0 -license: LGPL-2.1-only or MPL-1.1 -size: 927045 -timestamp: 1766416003626 -- conda: https://conda.anaconda.org/bioconda/noarch/fastqc-0.12.1-hdfd78af_0.tar.bz2 -sha256: 7cc26225d590540ae95cd24940ff42f2da7479dd4cd22ae9ab9298665d06790c -md5: c9f6a4b12229f7331f79c9a00dd6e240 -depends: -- font-ttf-dejavu-sans-mono -- fontconfig -- openjdk >=8.0.144 -- perl -license: GPL >=3 -size: 11664291 -timestamp: 1677946722445 -- conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2 -sha256: 58d7f40d2940dd0a8aa28651239adbf5613254df0f75789919c4e6762054403b -md5: 0c96522c6bdaed4b1566d11387caaf45 -license: BSD-3-Clause -license_family: BSD -size: 397370 -timestamp: 1566932522327 -- conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2 -sha256: c52a29fdac682c20d252facc50f01e7c2e7ceac52aa9817aaf0bb83f7559ec5c -md5: 34893075a5c9e55cdafac56607368fc6 -license: OFL-1.1 -license_family: Other -size: 96530 -timestamp: 1620479909603 -- conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2 -sha256: 00925c8c055a2275614b4d983e1df637245e19058d79fc7dd1a93b8d9fb4b139 -md5: 4d59c254e01d9cde7957100457e2d5fb -license: OFL-1.1 -license_family: Other -size: 700814 -timestamp: 1620479612257 -- conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_3.conda -sha256: 2821ec1dc454bd8b9a31d0ed22a7ce22422c0aef163c59f49dfdf915d0f0ca14 -md5: 49023d73832ef61042f6a237cb2687e7 -license: LicenseRef-Ubuntu-Font-Licence-Version-1.0 -license_family: Other -size: 1620504 -timestamp: 1727511233259 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fontconfig-2.17.1-hba86a56_0.conda -sha256: 835aff8615dd8d8fff377679710ce81b8a2c47b6404e21a92fb349fda193a15c -md5: 0fed1ff55f4938a65907f3ecf62609db -depends: -- libexpat >=2.7.4,<3.0a0 -- libfreetype >=2.14.1 -- libfreetype6 >=2.14.1 -- libgcc >=14 -- libuuid >=2.41.3,<3.0a0 -- libzlib >=1.3.1,<2.0a0 -license: MIT -license_family: MIT -size: 279044 -timestamp: 1771382728182 -- conda: https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2 -sha256: a997f2f1921bb9c9d76e6fa2f6b408b7fa549edd349a77639c9fe7a23ea93e61 -md5: fee5683a3f04bd15cbd8318b096a27ab -depends: -- fonts-conda-forge -license: BSD-3-Clause -license_family: BSD -size: 3667 -timestamp: 1566974674465 -- conda: https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-hc364b38_1.conda -sha256: 54eea8469786bc2291cc40bca5f46438d3e062a399e8f53f013b6a9f50e98333 -md5: a7970cd949a077b7cb9696379d338681 -depends: -- font-ttf-ubuntu -- font-ttf-inconsolata -- font-ttf-dejavu-sans-mono -- font-ttf-source-code-pro -license: BSD-3-Clause -license_family: BSD -size: 4059 -timestamp: 1762351264405 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/giflib-5.2.2-h31becfc_0.conda -sha256: a79dc3bd54c4fb1f249942ee2d5b601a76ecf9614774a4cff9af49adfa458db2 -md5: 2f809afaf0ba1ea4135dce158169efac -depends: -- libgcc-ng >=12 -license: MIT -license_family: MIT -size: 82124 -timestamp: 1712692444545 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/graphite2-1.3.14-hfae3067_2.conda -sha256: c9b1781fe329e0b77c5addd741e58600f50bef39321cae75eba72f2f381374b7 -md5: 4aa540e9541cc9d6581ab23ff2043f13 -depends: -- libgcc >=14 -- libstdcxx >=14 -license: LGPL-2.0-or-later -license_family: LGPL -size: 102400 -timestamp: 1755102000043 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/harfbuzz-13.2.1-h1134a53_0.conda -sha256: e22f485fddaaea3ff4b6cae98e0197b9dccd2ed2770337ad6ff38a92afe04e59 -md5: 05d65a2cf410adc331c9ea61f59f1013 -depends: -- cairo >=1.18.4,<2.0a0 -- graphite2 >=1.3.14,<2.0a0 -- icu >=78.3,<79.0a0 -- libexpat >=2.7.4,<3.0a0 -- libfreetype >=2.14.2 -- libfreetype6 >=2.14.2 -- libgcc >=14 -- libglib >=2.86.4,<3.0a0 -- libstdcxx >=14 -- libzlib >=1.3.2,<2.0a0 -license: MIT -license_family: MIT -size: 2345732 -timestamp: 1774281448329 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/icu-78.3-hcab7f73_0.conda -sha256: 49ba6aed2c6b482bb0ba41078057555d29764299bc947b990708617712ef6406 -md5: 546da38c2fa9efacf203e2ad3f987c59 -depends: -- libgcc >=14 -- libstdcxx >=14 -license: MIT -license_family: MIT -size: 12837286 -timestamp: 1773822650615 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/keyutils-1.6.3-h86ecc28_0.conda -sha256: 5ce830ca274b67de11a7075430a72020c1fb7d486161a82839be15c2b84e9988 -md5: e7df0aab10b9cbb73ab2a467ebfaf8c7 -depends: -- libgcc >=13 -license: LGPL-2.1-or-later -size: 129048 -timestamp: 1754906002667 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/krb5-1.22.2-hfd895c2_0.conda -sha256: b53999d888dda53c506b264e8c02b5f5c8e022c781eda0718f007339e6bc90ba -md5: d9ca108bd680ea86a963104b6b3e95ca -depends: -- keyutils >=1.6.3,<2.0a0 -- libedit >=3.1.20250104,<3.2.0a0 -- libedit >=3.1.20250104,<4.0a0 -- libgcc >=14 -- libstdcxx >=14 -- openssl >=3.5.5,<4.0a0 -license: MIT -license_family: MIT -size: 1517436 -timestamp: 1769773395215 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/lcms2-2.18-h9d5b58d_0.conda -sha256: 379ef5e91a587137391a6149755d0e929f1a007d2dcb211318ac670a46c8596f -md5: bb960f01525b5e001608afef9d47b79c -depends: -- libgcc >=14 -- libjpeg-turbo >=3.1.2,<4.0a0 -- libtiff >=4.7.1,<4.8.0a0 -license: MIT -license_family: MIT -size: 293039 -timestamp: 1768184778398 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/lerc-4.1.0-h52b7260_0.conda -sha256: 8957fd460c1c132c8031f65fd5f56ec3807fd71b7cab2c5e2b0937b13404ab36 -md5: d13423b06447113a90b5b1366d4da171 -depends: -- libgcc >=14 -- libstdcxx >=14 -license: Apache-2.0 -license_family: Apache -size: 240444 -timestamp: 1773114901155 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcups-2.3.3-h4f2b762_6.conda -sha256: 41b04f995c9f63af8c4065a35931e46cbc2fdd6b9bf7e4c19f90d53cbb2bc8e5 -md5: 67828c963b17db7dc989fe5d509ef04a -depends: -- krb5 >=1.22.2,<1.23.0a0 -- libgcc >=14 -- libstdcxx >=14 -- libzlib >=1.3.1,<2.0a0 -license: Apache-2.0 -license_family: Apache -size: 4553739 -timestamp: 1770903929794 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libdeflate-1.25-h1af38f5_0.conda -sha256: 48814b73bd462da6eed2e697e30c060ae16af21e9fbed30d64feaf0aad9da392 -md5: a9138815598fe6b91a1d6782ca657b0c -depends: -- libgcc >=14 -license: MIT -license_family: MIT -size: 71117 -timestamp: 1761979776756 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libedit-3.1.20250104-pl5321h976ea20_0.conda -sha256: c0b27546aa3a23d47919226b3a1635fccdb4f24b94e72e206a751b33f46fd8d6 -md5: fb640d776fc92b682a14e001980825b1 -depends: -- ncurses -- libgcc >=13 -- ncurses >=6.5,<7.0a0 -license: BSD-2-Clause -license_family: BSD -size: 148125 -timestamp: 1738479808948 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libexpat-2.7.4-hfae3067_0.conda -sha256: 995ce3ad96d0f4b5ed6296b051a0d7b6377718f325bc0e792fbb96b0e369dad7 -md5: 57f3b3da02a50a1be2a6fe847515417d -depends: -- libgcc >=14 -constrains: -- expat 2.7.4.* -license: MIT -license_family: MIT -size: 76564 -timestamp: 1771259530958 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libffi-3.5.2-h376a255_0.conda -sha256: 3df4c539449aabc3443bbe8c492c01d401eea894603087fca2917aa4e1c2dea9 -md5: 2f364feefb6a7c00423e80dcb12db62a -depends: -- libgcc >=14 -license: MIT -license_family: MIT -size: 55952 -timestamp: 1769456078358 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libfreetype-2.14.3-h8af1aa0_0.conda -sha256: 752e4f66283d7deb4c6fd47d88df644d8daa2aaa825a54f3bf350a625190192a -md5: a229e22d4d8814a07702b0919d8e6701 -depends: -- libfreetype6 >=2.14.3 -license: GPL-2.0-only OR FTL -size: 8125 -timestamp: 1774301094057 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libfreetype6-2.14.3-hdae7a39_0.conda -sha256: 8e6b27fe4eec4c2fa7b7769a21973734c8dba1de80086fb0213e58375ac09f4c -md5: b99ed99e42dafb27889483b3098cace7 -depends: -- libgcc >=14 -- libpng >=1.6.55,<1.7.0a0 -- libzlib >=1.3.2,<2.0a0 -constrains: -- freetype >=2.14.3 -license: GPL-2.0-only OR FTL -size: 422941 -timestamp: 1774301093473 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgcc-15.2.0-h8acb6b2_18.conda -sha256: 43df385bedc1cab11993c4369e1f3b04b4ca5d0ea16cba6a0e7f18dbc129fcc9 -md5: 552567ea2b61e3a3035759b2fdb3f9a6 -depends: -- _openmp_mutex >=4.5 -constrains: -- libgcc-ng ==15.2.0=*_18 -- libgomp 15.2.0 h8acb6b2_18 -license: GPL-3.0-only WITH GCC-exception-3.1 -license_family: GPL -size: 622900 -timestamp: 1771378128706 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgcc-ng-15.2.0-he9431aa_18.conda -sha256: 83bb0415f59634dccfa8335d4163d1f6db00a27b36666736f9842b650b92cf2f -md5: 4feebd0fbf61075a1a9c2e9b3936c257 -depends: -- libgcc 15.2.0 h8acb6b2_18 -license: GPL-3.0-only WITH GCC-exception-3.1 -license_family: GPL -size: 27568 -timestamp: 1771378136019 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libglib-2.86.4-hf53f6bf_1.conda -sha256: afc503dbd04a5bf2709aa9d8318a03a8c4edb389f661ff280c3494bfef4341ec -md5: 4ac4372fc4d7f20630a91314cdac8afd -depends: -- libffi >=3.5.2,<3.6.0a0 -- libgcc >=14 -- libiconv >=1.18,<2.0a0 -- libzlib >=1.3.1,<2.0a0 -- pcre2 >=10.47,<10.48.0a0 -constrains: -- glib 2.86.4 *_1 -license: LGPL-2.1-or-later -size: 4512186 -timestamp: 1771863220969 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgomp-15.2.0-h8acb6b2_18.conda -sha256: fc716f11a6a8525e27a5d332ef6a689210b0d2a4dd1133edc0f530659aa9faa6 -md5: 4faa39bf919939602e594253bd673958 -license: GPL-3.0-only WITH GCC-exception-3.1 -license_family: GPL -size: 588060 -timestamp: 1771378040807 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libiconv-1.18-h90929bb_2.conda -sha256: 1473451cd282b48d24515795a595801c9b65b567fe399d7e12d50b2d6cdb04d9 -md5: 5a86bf847b9b926f3a4f203339748d78 -depends: -- libgcc >=14 -license: LGPL-2.1-only -size: 791226 -timestamp: 1754910975665 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libjpeg-turbo-3.1.2-he30d5cf_0.conda -sha256: 84064c7c53a64291a585d7215fe95ec42df74203a5bf7615d33d49a3b0f08bb6 -md5: 5109d7f837a3dfdf5c60f60e311b041f -depends: -- libgcc >=14 -constrains: -- jpeg <0.0.0a -license: IJG AND BSD-3-Clause AND Zlib -size: 691818 -timestamp: 1762094728337 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/liblzma-5.8.2-he30d5cf_0.conda -sha256: 843c46e20519651a3e357a8928352b16c5b94f4cd3d5481acc48be2e93e8f6a3 -md5: 96944e3c92386a12755b94619bae0b35 -depends: -- libgcc >=14 -constrains: -- xz 5.8.2.* -license: 0BSD -size: 125916 -timestamp: 1768754941722 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libpng-1.6.55-h1abf092_0.conda -sha256: c7378c6b79de4d571d00ad1caf0a4c19d43c9c94077a761abb6ead44d891f907 -md5: be4088903b94ea297975689b3c3aeb27 -depends: -- libgcc >=14 -- libzlib >=1.3.1,<2.0a0 -license: zlib-acknowledgement -size: 340156 -timestamp: 1770691477245 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libstdcxx-15.2.0-hef695bb_18.conda -sha256: 31fdb9ffafad106a213192d8319b9f810e05abca9c5436b60e507afb35a6bc40 -md5: f56573d05e3b735cb03efeb64a15f388 -depends: -- libgcc 15.2.0 h8acb6b2_18 -constrains: -- libstdcxx-ng ==15.2.0=*_18 -license: GPL-3.0-only WITH GCC-exception-3.1 -license_family: GPL -size: 5541411 -timestamp: 1771378162499 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libtiff-4.7.1-hdb009f0_1.conda -sha256: 7ff79470db39e803e21b8185bc8f19c460666d5557b1378d1b1e857d929c6b39 -md5: 8c6fd84f9c87ac00636007c6131e457d -depends: -- lerc >=4.0.0,<5.0a0 -- libdeflate >=1.25,<1.26.0a0 -- libgcc >=14 -- libjpeg-turbo >=3.1.0,<4.0a0 -- liblzma >=5.8.1,<6.0a0 -- libstdcxx >=14 -- libwebp-base >=1.6.0,<2.0a0 -- libzlib >=1.3.1,<2.0a0 -- zstd >=1.5.7,<1.6.0a0 -license: HPND -size: 488407 -timestamp: 1762022048105 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libuuid-2.41.3-h1022ec0_0.conda -sha256: c37a8e89b700646f3252608f8368e7eb8e2a44886b92776e57ad7601fc402a11 -md5: cf2861212053d05f27ec49c3784ff8bb -depends: -- libgcc >=14 -license: BSD-3-Clause -license_family: BSD -size: 43453 -timestamp: 1766271546875 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libwebp-base-1.6.0-ha2e29f5_0.conda -sha256: b03700a1f741554e8e5712f9b06dd67e76f5301292958cd3cb1ac8c6fdd9ed25 -md5: 24e92d0942c799db387f5c9d7b81f1af -depends: -- libgcc >=14 -constrains: -- libwebp 1.6.0 -license: BSD-3-Clause -license_family: BSD -size: 359496 -timestamp: 1752160685488 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libxcb-1.17.0-h262b8f6_0.conda -sha256: 461cab3d5650ac6db73a367de5c8eca50363966e862dcf60181d693236b1ae7b -md5: cd14ee5cca2464a425b1dbfc24d90db2 -depends: -- libgcc >=13 -- pthread-stubs -- xorg-libxau >=1.0.11,<2.0a0 -- xorg-libxdmcp -license: MIT -license_family: MIT -size: 397493 -timestamp: 1727280745441 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libxcrypt-4.4.36-h31becfc_1.conda -sha256: 6b46c397644091b8a26a3048636d10b989b1bf266d4be5e9474bf763f828f41f -md5: b4df5d7d4b63579d081fd3a4cf99740e -depends: -- libgcc-ng >=12 -license: LGPL-2.1-or-later -size: 114269 -timestamp: 1702724369203 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libzlib-1.3.2-hdc9db2a_2.conda -sha256: eb111e32e5a7313a5bf799c7fb2419051fa2fe7eff74769fac8d5a448b309f7f -md5: 502006882cf5461adced436e410046d1 -constrains: -- zlib 1.3.2 *_2 -license: Zlib -license_family: Other -size: 69833 -timestamp: 1774072605429 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/ncurses-6.5-ha32ae93_3.conda -sha256: 91cfb655a68b0353b2833521dc919188db3d8a7f4c64bea2c6a7557b24747468 -md5: 182afabe009dc78d8b73100255ee6868 -depends: -- libgcc >=13 -license: X11 AND BSD-3-Clause -size: 926034 -timestamp: 1738196018799 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/openjdk-25.0.2-h488f50d_0.conda -sha256: 6fd2c872b275fa5d42a61a4b6dc28a819cde29f9048adb547363597432e0720e -md5: 27fdd5d67e235c20d23b2d66406497d3 -depends: -- xorg-libx11 -- xorg-libxext -- xorg-libxi -- xorg-libxrender -- xorg-libxtst -- libstdcxx >=14 -- libgcc >=14 -- libzlib >=1.3.1,<2.0a0 -- xorg-libxtst >=1.2.5,<2.0a0 -- libpng >=1.6.55,<1.7.0a0 -- alsa-lib >=1.2.15.3,<1.3.0a0 -- xorg-libx11 >=1.8.13,<2.0a0 -- xorg-libxi >=1.8.2,<2.0a0 -- xorg-libxrandr >=1.5.5,<2.0a0 -- lcms2 >=2.18,<3.0a0 -- xorg-libxrender >=0.9.12,<0.10.0a0 -- libcups >=2.3.3,<2.4.0a0 -- libfreetype >=2.14.1 -- libfreetype6 >=2.14.1 -- harfbuzz >=12.3.2 -- xorg-libxext >=1.3.7,<2.0a0 -- giflib >=5.2.2,<5.3.0a0 -- xorg-libxt >=1.3.1,<2.0a0 -- libjpeg-turbo >=3.1.2,<4.0a0 -- fontconfig >=2.17.1,<3.0a0 -- fonts-conda-ecosystem -license: GPL-2.0-or-later WITH Classpath-exception-2.0 -license_family: GPL -size: 106988620 -timestamp: 1771443741031 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/openssl-3.6.1-h546c87b_1.conda -sha256: 7f8048c0e75b2620254218d72b4ae7f14136f1981c5eb555ef61645a9344505f -md5: 25f5885f11e8b1f075bccf4a2da91c60 -depends: -- ca-certificates -- libgcc >=14 -license: Apache-2.0 -license_family: Apache -size: 3692030 -timestamp: 1769557678657 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/pcre2-10.47-hf841c20_0.conda -sha256: 04df2cee95feba440387f33f878e9f655521e69f4be33a0cd637f07d3d81f0f9 -md5: 1a30c42e32ca0ea216bd0bfe6f842f0b -depends: -- bzip2 >=1.0.8,<2.0a0 -- libgcc >=14 -- libzlib >=1.3.1,<2.0a0 -license: BSD-3-Clause -license_family: BSD -size: 1166552 -timestamp: 1763655534263 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/perl-5.32.1-7_h31becfc_perl5.conda -build_number: 7 -sha256: d78296134263b5bf476cad838ded65451e7162db756f9997c5d06b08122572ed -md5: 17d019cb2a6c72073c344e98e40dfd61 -depends: -- libgcc-ng >=12 -- libxcrypt >=4.4.36 -license: GPL-1.0-or-later OR Artistic-1.0-Perl -size: 13338804 -timestamp: 1703310557094 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/pixman-0.46.4-h7ac5ae9_1.conda -sha256: e6b0846a998f2263629cfeac7bca73565c35af13251969f45d385db537a514e4 -md5: 1587081d537bd4ae77d1c0635d465ba5 -depends: -- libgcc >=14 -- libstdcxx >=14 -- libgcc >=14 -license: MIT -license_family: MIT -size: 357913 -timestamp: 1754665583353 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/procps-ng-4.0.6-h1779866_0.conda -sha256: e9cbcbc94e151ada3d6dc365380aaaf591f65012c16d9a2abaea4b9b90adc402 -md5: ab7288cc39545556d1bc5e71ab2df9a9 -depends: -- libgcc >=14 -- ncurses >=6.5,<7.0a0 -license: GPL-2.0-or-later AND LGPL-2.0-or-later -license_family: GPL -size: 636733 -timestamp: 1769712412683 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/pthread-stubs-0.4-h86ecc28_1002.conda -sha256: 977dfb0cb3935d748521dd80262fe7169ab82920afd38ed14b7fee2ea5ec01ba -md5: bb5a90c93e3bac3d5690acf76b4a6386 -depends: -- libgcc >=13 -license: MIT -license_family: MIT -size: 8342 -timestamp: 1726803319942 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libice-1.1.2-h86ecc28_0.conda -sha256: a2ba1864403c7eb4194dacbfe2777acf3d596feae43aada8d1b478617ce45031 -md5: c8d8ec3e00cd0fd8a231789b91a7c5b7 -depends: -- libgcc >=13 -license: MIT -license_family: MIT -size: 60433 -timestamp: 1734229908988 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libsm-1.2.6-h0808dbd_0.conda -sha256: b86a819cd16f90c01d9d81892155126d01555a20dabd5f3091da59d6309afd0a -md5: 2d1409c50882819cb1af2de82e2b7208 -depends: -- libgcc >=13 -- libuuid >=2.38.1,<3.0a0 -- xorg-libice >=1.1.2,<2.0a0 -license: MIT -license_family: MIT -size: 28701 -timestamp: 1741897678254 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libx11-1.8.13-h63a1b12_0.conda -sha256: cf886160e2ff580d77f7eb8ec1a77c41c2c5b05343e329bc35f0ddf40b8d92ab -md5: 22dd10425ef181e80e130db50675d615 -depends: -- libgcc >=14 -- libxcb >=1.17.0,<2.0a0 -license: MIT -license_family: MIT -size: 869058 -timestamp: 1770819244991 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxau-1.0.12-he30d5cf_1.conda -sha256: e9f6e931feeb2f40e1fdbafe41d3b665f1ab6cb39c5880a1fcf9f79a3f3c84a5 -md5: 1c246e1105000c3660558459e2fd6d43 -depends: -- libgcc >=14 -license: MIT -license_family: MIT -size: 16317 -timestamp: 1762977521691 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxdmcp-1.1.5-he30d5cf_1.conda -sha256: 128d72f36bcc8d2b4cdbec07507542e437c7d67f677b7d77b71ed9eeac7d6df1 -md5: bff06dcde4a707339d66d45d96ceb2e2 -depends: -- libgcc >=14 -license: MIT -license_family: MIT -size: 21039 -timestamp: 1762979038025 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxext-1.3.7-he30d5cf_0.conda -sha256: db2188bc0d844d4e9747bac7f6c1d067e390bd769c5ad897c93f1df759dc5dba -md5: fb42b683034619915863d68dd9df03a3 -depends: -- libgcc >=14 -- xorg-libx11 >=1.8.12,<2.0a0 -license: MIT -license_family: MIT -size: 52409 -timestamp: 1769446753771 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxfixes-6.0.2-he30d5cf_0.conda -sha256: 8cb9c88e25c57e47419e98f04f9ef3154ad96b9f858c88c570c7b91216a64d0e -md5: e8b4056544341daf1d415eaeae7a040c -depends: -- libgcc >=14 -- xorg-libx11 >=1.8.12,<2.0a0 -license: MIT -license_family: MIT -size: 20704 -timestamp: 1759284028146 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxi-1.8.2-h57736b2_0.conda -sha256: 7b587407ecb9ccd2bbaf0fb94c5dbdde4d015346df063e9502dc0ce2b682fb5e -md5: eeee3bdb31c6acde2b81ad1b8c287087 -depends: -- libgcc >=13 -- xorg-libx11 >=1.8.9,<2.0a0 -- xorg-libxext >=1.3.6,<2.0a0 -- xorg-libxfixes >=6.0.1,<7.0a0 -license: MIT -license_family: MIT -size: 48197 -timestamp: 1727801059062 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxrandr-1.5.5-he30d5cf_0.conda -sha256: 9f5196665a8d72f4f119c40dcc4bafeb0b540b102cc7b8b299c2abf599e7919f -md5: 1f64c613f0b8d67e9fb0e165d898fb6b -depends: -- libgcc >=14 -- xorg-libx11 >=1.8.12,<2.0a0 -- xorg-libxext >=1.3.6,<2.0a0 -- xorg-libxrender >=0.9.12,<0.10.0a0 -license: MIT -license_family: MIT -size: 31122 -timestamp: 1769445286951 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxrender-0.9.12-h86ecc28_0.conda -sha256: ffd77ee860c9635a28cfda46163dcfe9224dc6248c62404c544ae6b564a0be1f -md5: ae2c2dd0e2d38d249887727db2af960e -depends: -- libgcc >=13 -- xorg-libx11 >=1.8.10,<2.0a0 -license: MIT -license_family: MIT -size: 33649 -timestamp: 1734229123157 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxt-1.3.1-h57736b2_0.conda -sha256: 7c109792b60720809a580612aba7f8eb2a0bd425b9fc078748a9d6ffc97cbfa8 -md5: a9e4852c8e0b68ee783e7240030b696f -depends: -- libgcc >=13 -- xorg-libice >=1.1.1,<2.0a0 -- xorg-libsm >=1.2.4,<2.0a0 -- xorg-libx11 >=1.8.9,<2.0a0 -license: MIT -license_family: MIT -size: 384752 -timestamp: 1731860572314 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxtst-1.2.5-h57736b2_3.conda -sha256: 6eaffce5a34fc0a16a21ddeaefb597e792a263b1b0c387c1ce46b0a967d558e1 -md5: c05698071b5c8e0da82a282085845860 -depends: -- libgcc >=13 -- xorg-libx11 >=1.8.9,<2.0a0 -- xorg-libxext >=1.3.6,<2.0a0 -- xorg-libxi >=1.7.10,<2.0a0 -license: MIT -license_family: MIT -size: 33786 -timestamp: 1727964907993 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/zstd-1.5.7-h85ac4a6_6.conda -sha256: 569990cf12e46f9df540275146da567d9c618c1e9c7a0bc9d9cfefadaed20b75 -md5: c3655f82dcea2aa179b291e7099c1fcc -depends: -- libzlib >=1.3.1,<2.0a0 -license: BSD-3-Clause -license_family: BSD -size: 614429 -timestamp: 1764777145593 diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf deleted file mode 100644 index 10851264..00000000 --- a/modules/nf-core/fastqc/main.nf +++ /dev/null @@ -1,57 +0,0 @@ -process FASTQC { - tag "${meta.id}" - label 'process_low' - - conda "${moduleDir}/environment.yml" - container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container - ? 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' - : 'quay.io/biocontainers/fastqc:0.12.1--hdfd78af_0'}" - - input: - tuple val(meta), path(reads, stageAs: '?/*') - - output: - tuple val(meta), path("*.html"), emit: html - tuple val(meta), path("*.zip"), emit: zip - tuple val("${task.process}"), val('fastqc'), eval('fastqc --version | sed "/FastQC v/!d; s/.*v//"'), emit: versions_fastqc, topic: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - // Make list of old name and new name pairs to use for renaming in the bash while loop - def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[reads, "${prefix}.${reads.extension}"]] : reads.withIndex().collect { entry, index -> [entry, "${prefix}_${index + 1}.${entry.extension}"] } - def rename_to = old_new_pairs*.join(' ').join(' ') - def renamed_files = old_new_pairs.collect { _old_name, new_name -> new_name }.join(' ') - - // The total amount of allocated RAM by FastQC is equal to the number of threads defined (--threads) time the amount of RAM defined (--memory) - // https://github.com/s-andrews/FastQC/blob/1faeea0412093224d7f6a07f777fad60a5650795/fastqc#L211-L222 - // Dividing the task.memory by task.cpus allows to stick to requested amount of RAM in the label - def memory_in_mb = task.memory - ? (task.memory.toUnit('MB') / task.cpus).intValue() - : null - // FastQC memory value allowed range (100 - 10000) - def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb) - def fastqc_memory_arg = fastqc_memory ? "--memory ${fastqc_memory}" : '' - - """ - printf "%s %s\\n" ${rename_to} | while read old_name new_name; do - [ -f "\${new_name}" ] || ln -s \$old_name \$new_name - done - - fastqc \\ - ${args} \\ - --threads ${task.cpus} \\ - ${fastqc_memory_arg} \\ - ${renamed_files} - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.html - touch ${prefix}.zip - """ -} diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml deleted file mode 100644 index 2f6cfef6..00000000 --- a/modules/nf-core/fastqc/meta.yml +++ /dev/null @@ -1,111 +0,0 @@ -name: fastqc -description: Run FastQC on sequenced reads -keywords: - - quality control - - qc - - adapters - - fastq -tools: - - fastqc: - description: | - FastQC gives general quality metrics about your reads. - It provides information about the quality score distribution - across your reads, the per base sequence content (%A/C/G/T). - - You get information about adapter contamination and other - overrepresented sequences. - homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ - documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ - licence: ["GPL-2.0-only"] - identifier: biotools:fastqc -input: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. - ontologies: [] -output: - html: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - "*.html": - type: file - description: FastQC report - pattern: "*_{fastqc.html}" - ontologies: [] - zip: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - "*.zip": - type: file - description: FastQC report archive - pattern: "*_{fastqc.zip}" - ontologies: [] - versions_fastqc: - - - ${task.process}: - type: string - description: The process the versions were collected from - - fastqc: - type: string - description: The tool name - - fastqc --version | sed "/FastQC v/!d; s/.*v//": - type: eval - description: The expression to obtain the version of the tool - -topics: - versions: - - - ${task.process}: - type: string - description: The process the versions were collected from - - fastqc: - type: string - description: The tool name - - fastqc --version | sed "/FastQC v/!d; s/.*v//": - type: eval - description: The expression to obtain the version of the tool -authors: - - "@drpatelh" - - "@grst" - - "@ewels" - - "@FelixKrueger" -maintainers: - - "@drpatelh" - - "@grst" - - "@ewels" - - "@FelixKrueger" -containers: - docker: - linux/arm64: - name: community.wave.seqera.io/library/fastqc:0.12.1--e455e32f745abe68 - build_id: bd-e455e32f745abe68_1 - scan_id: sc-f102f736465af88c_1 - linux/amd64: - name: community.wave.seqera.io/library/fastqc:0.12.1--5cb1a2fa2f18c7c2 - build_id: bd-5cb1a2fa2f18c7c2_1 - scan_id: sc-0c0466326b6b77d2_1 - singularity: - linux/amd64: - name: oras://community.wave.seqera.io/library/fastqc:0.12.1--5c4bd442468d75dd - build_id: bd-5c4bd442468d75dd_1 - https: https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/f2/f20b021476d1d87658820f971ebecc1e8cdbde0f338eb0d9cea2b0a8fc54a54b/data - linux/arm64: - name: oras://community.wave.seqera.io/library/fastqc:0.12.1--127a87fc06499035 - build_id: bd-127a87fc06499035_1 - https: https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/46/46daf2dad0169afd2ae047c3e50ed3776259f664bf07e5e06b045dc23449e994/data - conda: - linux/amd64: - lock_file: modules/nf-core/fastqc/.conda-lock/linux_amd64-bd-5cb1a2fa2f18c7c2_1.txt - linux/arm64: - lock_file: modules/nf-core/fastqc/.conda-lock/linux_arm64-bd-e455e32f745abe68_1.txt diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test deleted file mode 100644 index 66c44da9..00000000 --- a/modules/nf-core/fastqc/tests/main.nf.test +++ /dev/null @@ -1,309 +0,0 @@ -nextflow_process { - - name "Test Process FASTQC" - script "../main.nf" - process "FASTQC" - - tag "modules" - tag "modules_nfcore" - tag "fastqc" - - test("sarscov2 single-end [fastq]") { - - when { - process { - """ - input[0] = Channel.of([ - [ id: 'test', single_end:true ], - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. - // looks like this:
Mon 2 Oct 2023
test.gz
- // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 - { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, - { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, - { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, - { assert snapshot(sanitizeOutput(process.out).findAll { key, val -> key != 'html' && key != 'zip' }).match() } - ) - } - } - - test("sarscov2 paired-end [fastq]") { - - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, - { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, - { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, - { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, - { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, - { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, - { assert snapshot(sanitizeOutput(process.out).findAll { key, val -> key != 'html' && key != 'zip' }).match() } - ) - } - } - - test("sarscov2 interleaved [fastq]") { - - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, - { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, - { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, - { assert snapshot(sanitizeOutput(process.out).findAll { key, val -> key != 'html' && key != 'zip' }).match() } - ) - } - } - - test("sarscov2 paired-end [bam]") { - - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, - { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, - { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, - { assert snapshot(sanitizeOutput(process.out).findAll { key, val -> key != 'html' && key != 'zip' }).match() } - ) - } - } - - test("sarscov2 multiple [fastq]") { - - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, - { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, - { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" }, - { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" }, - { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, - { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, - { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" }, - { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" }, - { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, - { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, - { assert path(process.out.html[0][1][2]).text.contains("File typeConventional base calls") }, - { assert path(process.out.html[0][1][3]).text.contains("File typeConventional base calls") }, - { assert snapshot(sanitizeOutput(process.out).findAll { key, val -> key != 'html' && key != 'zip' }).match() } - ) - } - } - - test("sarscov2 custom_prefix") { - - when { - process { - """ - input[0] = Channel.of([ - [ id:'mysample', single_end:true ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" }, - { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, - { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, - { assert snapshot(sanitizeOutput(process.out).findAll { key, val -> key != 'html' && key != 'zip' }).match() } - ) - } - } - - test("sarscov2 single-end [fastq] - stub") { - - options "-stub" - when { - process { - """ - input[0] = Channel.of([ - [ id: 'test', single_end:true ], - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("sarscov2 paired-end [fastq] - stub") { - - options "-stub" - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("sarscov2 interleaved [fastq] - stub") { - - options "-stub" - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("sarscov2 paired-end [bam] - stub") { - - options "-stub" - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("sarscov2 multiple [fastq] - stub") { - - options "-stub" - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("sarscov2 custom_prefix - stub") { - - options "-stub" - when { - process { - """ - input[0] = Channel.of([ - [ id:'mysample', single_end:true ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } -} diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap deleted file mode 100644 index c8ee120f..00000000 --- a/modules/nf-core/fastqc/tests/main.nf.test.snap +++ /dev/null @@ -1,476 +0,0 @@ -{ - "sarscov2 custom_prefix": { - "content": [ - { - "versions_fastqc": [ - [ - "FASTQC", - "fastqc", - "0.12.1" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "25.10.0" - }, - "timestamp": "2025-10-28T16:39:14.518503" - }, - "sarscov2 single-end [fastq] - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": true - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": true - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - [ - "FASTQC", - "fastqc", - "0.12.1" - ] - ], - "html": [ - [ - { - "id": "test", - "single_end": true - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions_fastqc": [ - [ - "FASTQC", - "fastqc", - "0.12.1" - ] - ], - "zip": [ - [ - { - "id": "test", - "single_end": true - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "25.10.0" - }, - "timestamp": "2025-10-28T16:39:19.309008" - }, - "sarscov2 custom_prefix - stub": { - "content": [ - { - "0": [ - [ - { - "id": "mysample", - "single_end": true - }, - "mysample.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "mysample", - "single_end": true - }, - "mysample.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - [ - "FASTQC", - "fastqc", - "0.12.1" - ] - ], - "html": [ - [ - { - "id": "mysample", - "single_end": true - }, - "mysample.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions_fastqc": [ - [ - "FASTQC", - "fastqc", - "0.12.1" - ] - ], - "zip": [ - [ - { - "id": "mysample", - "single_end": true - }, - "mysample.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "25.10.0" - }, - "timestamp": "2025-10-28T16:39:44.94888" - }, - "sarscov2 interleaved [fastq]": { - "content": [ - { - "versions_fastqc": [ - [ - "FASTQC", - "fastqc", - "0.12.1" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "25.10.0" - }, - "timestamp": "2025-10-28T16:38:45.168496" - }, - "sarscov2 paired-end [bam]": { - "content": [ - { - "versions_fastqc": [ - [ - "FASTQC", - "fastqc", - "0.12.1" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "25.10.0" - }, - "timestamp": "2025-10-28T16:38:53.268919" - }, - "sarscov2 multiple [fastq]": { - "content": [ - { - "versions_fastqc": [ - [ - "FASTQC", - "fastqc", - "0.12.1" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "25.10.0" - }, - "timestamp": "2025-10-28T16:39:05.050305" - }, - "sarscov2 paired-end [fastq]": { - "content": [ - { - "versions_fastqc": [ - [ - "FASTQC", - "fastqc", - "0.12.1" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "25.10.0" - }, - "timestamp": "2025-10-28T16:38:37.2373" - }, - "sarscov2 paired-end [fastq] - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - [ - "FASTQC", - "fastqc", - "0.12.1" - ] - ], - "html": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions_fastqc": [ - [ - "FASTQC", - "fastqc", - "0.12.1" - ] - ], - "zip": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "25.10.0" - }, - "timestamp": "2025-10-28T16:39:24.450398" - }, - "sarscov2 multiple [fastq] - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - [ - "FASTQC", - "fastqc", - "0.12.1" - ] - ], - "html": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions_fastqc": [ - [ - "FASTQC", - "fastqc", - "0.12.1" - ] - ], - "zip": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "25.10.0" - }, - "timestamp": "2025-10-28T16:39:39.758762" - }, - "sarscov2 single-end [fastq]": { - "content": [ - { - "versions_fastqc": [ - [ - "FASTQC", - "fastqc", - "0.12.1" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "25.10.0" - }, - "timestamp": "2025-10-28T16:38:29.555068" - }, - "sarscov2 interleaved [fastq] - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - [ - "FASTQC", - "fastqc", - "0.12.1" - ] - ], - "html": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions_fastqc": [ - [ - "FASTQC", - "fastqc", - "0.12.1" - ] - ], - "zip": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "25.10.0" - }, - "timestamp": "2025-10-28T16:39:29.193136" - }, - "sarscov2 paired-end [bam] - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - [ - "FASTQC", - "fastqc", - "0.12.1" - ] - ], - "html": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions_fastqc": [ - [ - "FASTQC", - "fastqc", - "0.12.1" - ] - ], - "zip": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "25.10.0" - }, - "timestamp": "2025-10-28T16:39:34.144919" - } -} \ No newline at end of file diff --git a/modules/nf-core/multiqc/.conda-lock/linux_amd64-bd-c1f4a7982b743963_1.txt b/modules/nf-core/multiqc/.conda-lock/linux_amd64-bd-c1f4a7982b743963_1.txt deleted file mode 100644 index 76190304..00000000 --- a/modules/nf-core/multiqc/.conda-lock/linux_amd64-bd-c1f4a7982b743963_1.txt +++ /dev/null @@ -1,1552 +0,0 @@ - -version: 6 -environments: -default: -channels: -- url: https://conda.anaconda.org/conda-forge/ -- url: https://conda.anaconda.org/bioconda/ -- url: https://conda.anaconda.org/bioconda/ -options: -pypi-prerelease-mode: if-necessary-or-explicit -packages: -linux-64: -- conda: https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-20_gnu.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/_python_abi3_support-1.0-hd8ed1ab_2.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/annotated-types-0.7.0-pyhd8ed1ab_1.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/attrs-26.1.0-pyhcf101f3_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/backports.zstd-1.3.0-py314h680f03e_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/brotli-python-1.2.0-py314h3de4e8d_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hda65f42_9.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2026.2.25-hbd8a1cb_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/certifi-2026.2.25-pyhd8ed1ab_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.4.6-pyhd8ed1ab_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/click-8.3.1-pyh8f84b5b_1.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/coloredlogs-15.0.1-pyhd8ed1ab_4.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/colormath-3.0.0-pyhd8ed1ab_4.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/cpython-3.14.3-py314hd8ed1ab_101.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/expat-2.7.4-hecca717_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2 -- conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2 -- conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2 -- conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_3.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.17.1-h27c8c51_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-hc364b38_1.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/h2-4.3.0-pyhcf101f3_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/hpack-4.1.0-pyhd8ed1ab_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/humanfriendly-10.0-pyh707e725_8.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/humanize-4.15.0-pyhd8ed1ab_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/hyperframe-6.1.0-pyhd8ed1ab_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/icu-78.3-h33c6efd_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/idna-3.11-pyhd8ed1ab_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-8.8.0-pyhcf101f3_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.6-pyhcf101f3_1.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/jsonschema-4.26.0-pyhcf101f3_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/jsonschema-specifications-2025.9.1-pyhcf101f3_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/kaleido-core-0.2.1-h3644ca4_0.tar.bz2 -- conda: https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.18-h0c24ade_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.45.1-default_hbd61a6d_102.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/lerc-4.1.0-hdb68285_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libblas-3.11.0-5_h4a7cf45_openblas.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.11.0-5_h0358290_openblas.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.25-h17f619e_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.4-hecca717_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libffi-3.5.2-h3435931_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libfreetype-2.14.3-ha770c72_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libfreetype6-2.14.3-h73754d4_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-15.2.0-he0feb66_18.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-15.2.0-h69a702a_18.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libgfortran-15.2.0-h69a702a_18.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-15.2.0-h68bc16d_18.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libgomp-15.2.0-he0feb66_18.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.1.2-hb03c661_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.11.0-5_h47877c9_openblas.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.2-hb03c661_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libmpdec-4.0.0-hb03c661_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.30-pthreads_h94d23a6_4.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.55-h421ea60_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.52.0-hf4e2dac_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-15.2.0-h934c35e_18.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.7.1-h9d88235_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.41.3-h5347b49_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.6.0-hd42ef1d_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.17.0-h8a09558_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.2-h25fd6f3_2.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/markdown-3.10.2-pyhcf101f3_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/markdown-it-py-4.0.0-pyhd8ed1ab_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/markupsafe-3.0.3-py314h67df5f8_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/mathjax-2.7.7-ha770c72_3.tar.bz2 -- conda: https://conda.anaconda.org/conda-forge/noarch/mdurl-0.1.2-pyhd8ed1ab_1.conda -- conda: https://conda.anaconda.org/bioconda/noarch/multiqc-1.33-pyhdfd78af_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/narwhals-2.18.1-pyhcf101f3_1.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/natsort-8.4.0-pyhcf101f3_2.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h2d0b736_3.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/networkx-3.6.1-pyhcf101f3_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/nspr-4.38-h29cc59b_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/nss-3.118-h445c969_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/numpy-2.4.3-py314h2b28147_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.4-h55fea9a_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/openssl-3.6.1-h35e630c_1.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/packaging-26.0-pyhcf101f3_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/pillow-12.1.1-py314h8ec4b1a_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/plotly-6.6.0-pyhd8ed1ab_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/polars-1.39.3-pyh58ad624_1.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/polars-lts-cpu-1.34.0.deprecated-hc364b38_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/polars-runtime-32-1.39.3-py310hffdcd12_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/polars-runtime-compat-1.39.3-py310hbcd5346_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/procps-ng-4.0.6-h18c060e_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-hb9d3cd8_1002.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/pyaml-env-1.2.2-pyhd8ed1ab_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/pydantic-2.12.5-pyhcf101f3_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/pydantic-core-2.41.5-py314h2e6c369_1.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/pygments-2.19.2-pyhd8ed1ab_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha55dd90_7.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/python-3.14.3-h32b2ec7_101_cp314.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/python-dotenv-1.2.2-pyhcf101f3_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/python-gil-3.14.3-h4df99d1_101.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/python-kaleido-0.2.1-pyhd8ed1ab_0.tar.bz2 -- conda: https://conda.anaconda.org/conda-forge/noarch/python_abi-3.14-8_cp314.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/pyyaml-6.0.3-py314h67df5f8_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/readline-8.3-h853b02a_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/referencing-0.37.0-pyhcf101f3_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/regex-2026.2.28-py314h5bd0f2a_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/requests-2.32.5-pyhcf101f3_1.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/rich-14.3.3-pyhcf101f3_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/rich-click-1.9.7-pyh8f84b5b_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/rpds-py-0.30.0-py314h2e6c369_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/spectra-0.0.11-pyhd8ed1ab_2.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/sqlite-3.52.0-h04a0ce9_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/tiktoken-0.12.0-py314h67fec18_3.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h366c992_103.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/tqdm-4.67.3-pyh8f84b5b_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/typeguard-4.5.1-pyhd8ed1ab_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.15.0-h396c80c_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/typing-inspection-0.4.2-pyhd8ed1ab_1.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.15.0-pyhcf101f3_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2025c-hc9c84f9_1.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/urllib3-2.6.3-pyhd8ed1ab_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.12-hb03c661_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.5-hb03c661_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/yaml-0.2.5-h280c20c_3.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/zipp-3.23.0-pyhcf101f3_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/zlib-ng-2.3.3-hceb46e0_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.7-hb78ec9c_6.conda -packages: -- conda: https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-20_gnu.conda -build_number: 20 -sha256: 1dd3fffd892081df9726d7eb7e0dea6198962ba775bd88842135a4ddb4deb3c9 -md5: a9f577daf3de00bca7c3c76c0ecbd1de -depends: -- __glibc >=2.17,<3.0.a0 -- libgomp >=7.5.0 -constrains: -- openmp_impl <0.0a0 -license: BSD-3-Clause -license_family: BSD -size: 28948 -timestamp: 1770939786096 -- conda: https://conda.anaconda.org/conda-forge/noarch/_python_abi3_support-1.0-hd8ed1ab_2.conda -sha256: a3967b937b9abf0f2a99f3173fa4630293979bd1644709d89580e7c62a544661 -md5: aaa2a381ccc56eac91d63b6c1240312f -depends: -- cpython -- python-gil -license: MIT -license_family: MIT -size: 8191 -timestamp: 1744137672556 -- conda: https://conda.anaconda.org/conda-forge/noarch/annotated-types-0.7.0-pyhd8ed1ab_1.conda -sha256: e0ea1ba78fbb64f17062601edda82097fcf815012cf52bb704150a2668110d48 -md5: 2934f256a8acfe48f6ebb4fce6cde29c -depends: -- python >=3.9 -- typing-extensions >=4.0.0 -license: MIT -license_family: MIT -size: 18074 -timestamp: 1733247158254 -- conda: https://conda.anaconda.org/conda-forge/noarch/attrs-26.1.0-pyhcf101f3_0.conda -sha256: 1b6124230bb4e571b1b9401537ecff575b7b109cc3a21ee019f65e083b8399ab -md5: c6b0543676ecb1fb2d7643941fe375f2 -depends: -- python >=3.10 -- python -license: MIT -license_family: MIT -size: 64927 -timestamp: 1773935801332 -- conda: https://conda.anaconda.org/conda-forge/noarch/backports.zstd-1.3.0-py314h680f03e_0.conda -noarch: generic -sha256: c31ab719d256bc6f89926131e88ecd0f0c5d003fe8481852c6424f4ec6c7eb29 -md5: a2ac7763a9ac75055b68f325d3255265 -depends: -- python >=3.14 -license: BSD-3-Clause AND MIT AND EPL-2.0 -size: 7514 -timestamp: 1767044983590 -- conda: https://conda.anaconda.org/conda-forge/linux-64/brotli-python-1.2.0-py314h3de4e8d_1.conda -sha256: 3ad3500bff54a781c29f16ce1b288b36606e2189d0b0ef2f67036554f47f12b0 -md5: 8910d2c46f7e7b519129f486e0fe927a -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -- libstdcxx >=14 -- python >=3.14,<3.15.0a0 -- python_abi 3.14.* *_cp314 -constrains: -- libbrotlicommon 1.2.0 hb03c661_1 -license: MIT -license_family: MIT -size: 367376 -timestamp: 1764017265553 -- conda: https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hda65f42_9.conda -sha256: 0b75d45f0bba3e95dc693336fa51f40ea28c980131fec438afb7ce6118ed05f6 -md5: d2ffd7602c02f2b316fd921d39876885 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -license: bzip2-1.0.6 -license_family: BSD -size: 260182 -timestamp: 1771350215188 -- conda: https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2026.2.25-hbd8a1cb_0.conda -sha256: 67cc7101b36421c5913a1687ef1b99f85b5d6868da3abbf6ec1a4181e79782fc -md5: 4492fd26db29495f0ba23f146cd5638d -depends: -- __unix -license: ISC -size: 147413 -timestamp: 1772006283803 -- conda: https://conda.anaconda.org/conda-forge/noarch/certifi-2026.2.25-pyhd8ed1ab_0.conda -sha256: a6b118fd1ed6099dc4fc03f9c492b88882a780fadaef4ed4f93dc70757713656 -md5: 765c4d97e877cdbbb88ff33152b86125 -depends: -- python >=3.10 -license: ISC -size: 151445 -timestamp: 1772001170301 -- conda: https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.4.6-pyhd8ed1ab_0.conda -sha256: d86dfd428b2e3c364fa90e07437c8405d635aa4ef54b25ab51d9c712be4112a5 -md5: 49ee13eb9b8f44d63879c69b8a40a74b -depends: -- python >=3.10 -license: MIT -license_family: MIT -size: 58510 -timestamp: 1773660086450 -- conda: https://conda.anaconda.org/conda-forge/noarch/click-8.3.1-pyh8f84b5b_1.conda -sha256: 38cfe1ee75b21a8361c8824f5544c3866f303af1762693a178266d7f198e8715 -md5: ea8a6c3256897cc31263de9f455e25d9 -depends: -- python >=3.10 -- __unix -- python -license: BSD-3-Clause -license_family: BSD -size: 97676 -timestamp: 1764518652276 -- conda: https://conda.anaconda.org/conda-forge/noarch/coloredlogs-15.0.1-pyhd8ed1ab_4.conda -sha256: 8021c76eeadbdd5784b881b165242db9449783e12ce26d6234060026fd6a8680 -md5: b866ff7007b934d564961066c8195983 -depends: -- humanfriendly >=9.1 -- python >=3.9 -license: MIT -license_family: MIT -size: 43758 -timestamp: 1733928076798 -- conda: https://conda.anaconda.org/conda-forge/noarch/colormath-3.0.0-pyhd8ed1ab_4.conda -sha256: 59c9e29800b483b390467f90e82b0da3a4fbf0612efe1c90813fca232780e160 -md5: 071cf7b0ce333c81718b054066c15102 -depends: -- networkx >=2.0 -- numpy -- python >=3.9 -license: BSD-3-Clause -license_family: BSD -size: 39326 -timestamp: 1735759976140 -- conda: https://conda.anaconda.org/conda-forge/noarch/cpython-3.14.3-py314hd8ed1ab_101.conda -noarch: generic -sha256: 91b06300879df746214f7363d6c27c2489c80732e46a369eb2afc234bcafb44c -md5: 3bb89e4f795e5414addaa531d6b1500a -depends: -- python >=3.14,<3.15.0a0 -- python_abi * *_cp314 -license: Python-2.0 -size: 50078 -timestamp: 1770674447292 -- conda: https://conda.anaconda.org/conda-forge/linux-64/expat-2.7.4-hecca717_0.conda -sha256: 0cc345e4dead417996ce9a1f088b28d858f03d113d43c1963d29194366dcce27 -md5: a0535741a4934b3e386051065c58761a -depends: -- __glibc >=2.17,<3.0.a0 -- libexpat 2.7.4 hecca717_0 -- libgcc >=14 -license: MIT -license_family: MIT -size: 145274 -timestamp: 1771259434699 -- conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2 -sha256: 58d7f40d2940dd0a8aa28651239adbf5613254df0f75789919c4e6762054403b -md5: 0c96522c6bdaed4b1566d11387caaf45 -license: BSD-3-Clause -license_family: BSD -size: 397370 -timestamp: 1566932522327 -- conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2 -sha256: c52a29fdac682c20d252facc50f01e7c2e7ceac52aa9817aaf0bb83f7559ec5c -md5: 34893075a5c9e55cdafac56607368fc6 -license: OFL-1.1 -license_family: Other -size: 96530 -timestamp: 1620479909603 -- conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2 -sha256: 00925c8c055a2275614b4d983e1df637245e19058d79fc7dd1a93b8d9fb4b139 -md5: 4d59c254e01d9cde7957100457e2d5fb -license: OFL-1.1 -license_family: Other -size: 700814 -timestamp: 1620479612257 -- conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_3.conda -sha256: 2821ec1dc454bd8b9a31d0ed22a7ce22422c0aef163c59f49dfdf915d0f0ca14 -md5: 49023d73832ef61042f6a237cb2687e7 -license: LicenseRef-Ubuntu-Font-Licence-Version-1.0 -license_family: Other -size: 1620504 -timestamp: 1727511233259 -- conda: https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.17.1-h27c8c51_0.conda -sha256: aa4a44dba97151221100a637c7f4bde619567afade9c0265f8e1c8eed8d7bd8c -md5: 867127763fbe935bab59815b6e0b7b5c -depends: -- __glibc >=2.17,<3.0.a0 -- libexpat >=2.7.4,<3.0a0 -- libfreetype >=2.14.1 -- libfreetype6 >=2.14.1 -- libgcc >=14 -- libuuid >=2.41.3,<3.0a0 -- libzlib >=1.3.1,<2.0a0 -license: MIT -license_family: MIT -size: 270705 -timestamp: 1771382710863 -- conda: https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-hc364b38_1.conda -sha256: 54eea8469786bc2291cc40bca5f46438d3e062a399e8f53f013b6a9f50e98333 -md5: a7970cd949a077b7cb9696379d338681 -depends: -- font-ttf-ubuntu -- font-ttf-inconsolata -- font-ttf-dejavu-sans-mono -- font-ttf-source-code-pro -license: BSD-3-Clause -license_family: BSD -size: 4059 -timestamp: 1762351264405 -- conda: https://conda.anaconda.org/conda-forge/noarch/h2-4.3.0-pyhcf101f3_0.conda -sha256: 84c64443368f84b600bfecc529a1194a3b14c3656ee2e832d15a20e0329b6da3 -md5: 164fc43f0b53b6e3a7bc7dce5e4f1dc9 -depends: -- python >=3.10 -- hyperframe >=6.1,<7 -- hpack >=4.1,<5 -- python -license: MIT -license_family: MIT -size: 95967 -timestamp: 1756364871835 -- conda: https://conda.anaconda.org/conda-forge/noarch/hpack-4.1.0-pyhd8ed1ab_0.conda -sha256: 6ad78a180576c706aabeb5b4c8ceb97c0cb25f1e112d76495bff23e3779948ba -md5: 0a802cb9888dd14eeefc611f05c40b6e -depends: -- python >=3.9 -license: MIT -license_family: MIT -size: 30731 -timestamp: 1737618390337 -- conda: https://conda.anaconda.org/conda-forge/noarch/humanfriendly-10.0-pyh707e725_8.conda -sha256: fa2071da7fab758c669e78227e6094f6b3608228740808a6de5d6bce83d9e52d -md5: 7fe569c10905402ed47024fc481bb371 -depends: -- __unix -- python >=3.9 -license: MIT -license_family: MIT -size: 73563 -timestamp: 1733928021866 -- conda: https://conda.anaconda.org/conda-forge/noarch/humanize-4.15.0-pyhd8ed1ab_0.conda -sha256: 6c4343b376d0b12a4c75ab992640970d36c933cad1fd924f6a1181fa91710e80 -md5: daddf757c3ecd6067b9af1df1f25d89e -depends: -- python >=3.10 -license: MIT -license_family: MIT -size: 67994 -timestamp: 1766267728652 -- conda: https://conda.anaconda.org/conda-forge/noarch/hyperframe-6.1.0-pyhd8ed1ab_0.conda -sha256: 77af6f5fe8b62ca07d09ac60127a30d9069fdc3c68d6b256754d0ffb1f7779f8 -md5: 8e6923fc12f1fe8f8c4e5c9f343256ac -depends: -- python >=3.9 -license: MIT -license_family: MIT -size: 17397 -timestamp: 1737618427549 -- conda: https://conda.anaconda.org/conda-forge/linux-64/icu-78.3-h33c6efd_0.conda -sha256: fbf86c4a59c2ed05bbffb2ba25c7ed94f6185ec30ecb691615d42342baa1a16a -md5: c80d8a3b84358cb967fa81e7075fbc8a -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -- libstdcxx >=14 -license: MIT -license_family: MIT -size: 12723451 -timestamp: 1773822285671 -- conda: https://conda.anaconda.org/conda-forge/noarch/idna-3.11-pyhd8ed1ab_0.conda -sha256: ae89d0299ada2a3162c2614a9d26557a92aa6a77120ce142f8e0109bbf0342b0 -md5: 53abe63df7e10a6ba605dc5f9f961d36 -depends: -- python >=3.10 -license: BSD-3-Clause -license_family: BSD -size: 50721 -timestamp: 1760286526795 -- conda: https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-8.8.0-pyhcf101f3_0.conda -sha256: 82ab2a0d91ca1e7e63ab6a4939356667ef683905dea631bc2121aa534d347b16 -md5: 080594bf4493e6bae2607e65390c520a -depends: -- python >=3.10 -- zipp >=3.20 -- python -license: Apache-2.0 -license_family: APACHE -size: 34387 -timestamp: 1773931568510 -- conda: https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.6-pyhcf101f3_1.conda -sha256: fc9ca7348a4f25fed2079f2153ecdcf5f9cf2a0bc36c4172420ca09e1849df7b -md5: 04558c96691bed63104678757beb4f8d -depends: -- markupsafe >=2.0 -- python >=3.10 -- python -license: BSD-3-Clause -license_family: BSD -size: 120685 -timestamp: 1764517220861 -- conda: https://conda.anaconda.org/conda-forge/noarch/jsonschema-4.26.0-pyhcf101f3_0.conda -sha256: db973a37d75db8e19b5f44bbbdaead0c68dde745407f281e2a7fe4db74ec51d7 -md5: ada41c863af263cc4c5fcbaff7c3e4dc -depends: -- attrs >=22.2.0 -- jsonschema-specifications >=2023.3.6 -- python >=3.10 -- referencing >=0.28.4 -- rpds-py >=0.25.0 -- python -license: MIT -license_family: MIT -size: 82356 -timestamp: 1767839954256 -- conda: https://conda.anaconda.org/conda-forge/noarch/jsonschema-specifications-2025.9.1-pyhcf101f3_0.conda -sha256: 0a4f3b132f0faca10c89fdf3b60e15abb62ded6fa80aebfc007d05965192aa04 -md5: 439cd0f567d697b20a8f45cb70a1005a -depends: -- python >=3.10 -- referencing >=0.31.0 -- python -license: MIT -license_family: MIT -size: 19236 -timestamp: 1757335715225 -- conda: https://conda.anaconda.org/conda-forge/linux-64/kaleido-core-0.2.1-h3644ca4_0.tar.bz2 -sha256: 7f243680ca03eba7457b7a48f93a9440ba8181a8eac20a3eb5ef165ab6c96664 -md5: b3723b235b0758abaae8c82ce4d80146 -depends: -- __glibc >=2.17,<3.0.a0 -- expat >=2.2.10,<3.0.0a0 -- fontconfig -- fonts-conda-forge -- libgcc-ng >=9.3.0 -- mathjax 2.7.* -- nspr >=4.29,<5.0a0 -- nss >=3.62,<4.0a0 -- sqlite >=3.34.0,<4.0a0 -license: MIT -license_family: MIT -size: 62099926 -timestamp: 1615199463039 -- conda: https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.18-h0c24ade_0.conda -sha256: 836ec4b895352110335b9fdcfa83a8dcdbe6c5fb7c06c4929130600caea91c0a -md5: 6f2e2c8f58160147c4d1c6f4c14cbac4 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -- libjpeg-turbo >=3.1.2,<4.0a0 -- libtiff >=4.7.1,<4.8.0a0 -license: MIT -license_family: MIT -size: 249959 -timestamp: 1768184673131 -- conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.45.1-default_hbd61a6d_102.conda -sha256: 3d584956604909ff5df353767f3a2a2f60e07d070b328d109f30ac40cd62df6c -md5: 18335a698559cdbcd86150a48bf54ba6 -depends: -- __glibc >=2.17,<3.0.a0 -- zstd >=1.5.7,<1.6.0a0 -constrains: -- binutils_impl_linux-64 2.45.1 -license: GPL-3.0-only -license_family: GPL -size: 728002 -timestamp: 1774197446916 -- conda: https://conda.anaconda.org/conda-forge/linux-64/lerc-4.1.0-hdb68285_0.conda -sha256: f84cb54782f7e9cea95e810ea8fef186e0652d0fa73d3009914fa2c1262594e1 -md5: a752488c68f2e7c456bcbd8f16eec275 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -- libstdcxx >=14 -license: Apache-2.0 -license_family: Apache -size: 261513 -timestamp: 1773113328888 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libblas-3.11.0-5_h4a7cf45_openblas.conda -build_number: 5 -sha256: 18c72545080b86739352482ba14ba2c4815e19e26a7417ca21a95b76ec8da24c -md5: c160954f7418d7b6e87eaf05a8913fa9 -depends: -- libopenblas >=0.3.30,<0.3.31.0a0 -- libopenblas >=0.3.30,<1.0a0 -constrains: -- mkl <2026 -- liblapack 3.11.0 5*_openblas -- libcblas 3.11.0 5*_openblas -- blas 2.305 openblas -- liblapacke 3.11.0 5*_openblas -license: BSD-3-Clause -license_family: BSD -size: 18213 -timestamp: 1765818813880 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.11.0-5_h0358290_openblas.conda -build_number: 5 -sha256: 0cbdcc67901e02dc17f1d19e1f9170610bd828100dc207de4d5b6b8ad1ae7ad8 -md5: 6636a2b6f1a87572df2970d3ebc87cc0 -depends: -- libblas 3.11.0 5_h4a7cf45_openblas -constrains: -- liblapacke 3.11.0 5*_openblas -- blas 2.305 openblas -- liblapack 3.11.0 5*_openblas -license: BSD-3-Clause -license_family: BSD -size: 18194 -timestamp: 1765818837135 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.25-h17f619e_0.conda -sha256: aa8e8c4be9a2e81610ddf574e05b64ee131fab5e0e3693210c9d6d2fba32c680 -md5: 6c77a605a7a689d17d4819c0f8ac9a00 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -license: MIT -license_family: MIT -size: 73490 -timestamp: 1761979956660 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.4-hecca717_0.conda -sha256: d78f1d3bea8c031d2f032b760f36676d87929b18146351c4464c66b0869df3f5 -md5: e7f7ce06ec24cfcfb9e36d28cf82ba57 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -constrains: -- expat 2.7.4.* -license: MIT -license_family: MIT -size: 76798 -timestamp: 1771259418166 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libffi-3.5.2-h3435931_0.conda -sha256: 31f19b6a88ce40ebc0d5a992c131f57d919f73c0b92cd1617a5bec83f6e961e6 -md5: a360c33a5abe61c07959e449fa1453eb -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -license: MIT -license_family: MIT -size: 58592 -timestamp: 1769456073053 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libfreetype-2.14.3-ha770c72_0.conda -sha256: 38f014a7129e644636e46064ecd6b1945e729c2140e21d75bb476af39e692db2 -md5: e289f3d17880e44b633ba911d57a321b -depends: -- libfreetype6 >=2.14.3 -license: GPL-2.0-only OR FTL -size: 8049 -timestamp: 1774298163029 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libfreetype6-2.14.3-h73754d4_0.conda -sha256: 16f020f96da79db1863fcdd8f2b8f4f7d52f177dd4c58601e38e9182e91adf1d -md5: fb16b4b69e3f1dcfe79d80db8fd0c55d -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -- libpng >=1.6.55,<1.7.0a0 -- libzlib >=1.3.2,<2.0a0 -constrains: -- freetype >=2.14.3 -license: GPL-2.0-only OR FTL -size: 384575 -timestamp: 1774298162622 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-15.2.0-he0feb66_18.conda -sha256: faf7d2017b4d718951e3a59d081eb09759152f93038479b768e3d612688f83f5 -md5: 0aa00f03f9e39fb9876085dee11a85d4 -depends: -- __glibc >=2.17,<3.0.a0 -- _openmp_mutex >=4.5 -constrains: -- libgcc-ng ==15.2.0=*_18 -- libgomp 15.2.0 he0feb66_18 -license: GPL-3.0-only WITH GCC-exception-3.1 -license_family: GPL -size: 1041788 -timestamp: 1771378212382 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-15.2.0-h69a702a_18.conda -sha256: e318a711400f536c81123e753d4c797a821021fb38970cebfb3f454126016893 -md5: d5e96b1ed75ca01906b3d2469b4ce493 -depends: -- libgcc 15.2.0 he0feb66_18 -license: GPL-3.0-only WITH GCC-exception-3.1 -license_family: GPL -size: 27526 -timestamp: 1771378224552 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libgfortran-15.2.0-h69a702a_18.conda -sha256: d2c9fad338fd85e4487424865da8e74006ab2e2475bd788f624d7a39b2a72aee -md5: 9063115da5bc35fdc3e1002e69b9ef6e -depends: -- libgfortran5 15.2.0 h68bc16d_18 -constrains: -- libgfortran-ng ==15.2.0=*_18 -license: GPL-3.0-only WITH GCC-exception-3.1 -license_family: GPL -size: 27523 -timestamp: 1771378269450 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-15.2.0-h68bc16d_18.conda -sha256: 539b57cf50ec85509a94ba9949b7e30717839e4d694bc94f30d41c9d34de2d12 -md5: 646855f357199a12f02a87382d429b75 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=15.2.0 -constrains: -- libgfortran 15.2.0 -license: GPL-3.0-only WITH GCC-exception-3.1 -license_family: GPL -size: 2482475 -timestamp: 1771378241063 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libgomp-15.2.0-he0feb66_18.conda -sha256: 21337ab58e5e0649d869ab168d4e609b033509de22521de1bfed0c031bfc5110 -md5: 239c5e9546c38a1e884d69effcf4c882 -depends: -- __glibc >=2.17,<3.0.a0 -license: GPL-3.0-only WITH GCC-exception-3.1 -license_family: GPL -size: 603262 -timestamp: 1771378117851 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.1.2-hb03c661_0.conda -sha256: cc9aba923eea0af8e30e0f94f2ad7156e2984d80d1e8e7fe6be5a1f257f0eb32 -md5: 8397539e3a0bbd1695584fb4f927485a -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -constrains: -- jpeg <0.0.0a -license: IJG AND BSD-3-Clause AND Zlib -size: 633710 -timestamp: 1762094827865 -- conda: https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.11.0-5_h47877c9_openblas.conda -build_number: 5 -sha256: c723b6599fcd4c6c75dee728359ef418307280fa3e2ee376e14e85e5bbdda053 -md5: b38076eb5c8e40d0106beda6f95d7609 -depends: -- libblas 3.11.0 5_h4a7cf45_openblas -constrains: -- blas 2.305 openblas -- liblapacke 3.11.0 5*_openblas -- libcblas 3.11.0 5*_openblas -license: BSD-3-Clause -license_family: BSD -size: 18200 -timestamp: 1765818857876 -- conda: https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.2-hb03c661_0.conda -sha256: 755c55ebab181d678c12e49cced893598f2bab22d582fbbf4d8b83c18be207eb -md5: c7c83eecbb72d88b940c249af56c8b17 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -constrains: -- xz 5.8.2.* -license: 0BSD -size: 113207 -timestamp: 1768752626120 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libmpdec-4.0.0-hb03c661_1.conda -sha256: fe171ed5cf5959993d43ff72de7596e8ac2853e9021dec0344e583734f1e0843 -md5: 2c21e66f50753a083cbe6b80f38268fa -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -license: BSD-2-Clause -license_family: BSD -size: 92400 -timestamp: 1769482286018 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.30-pthreads_h94d23a6_4.conda -sha256: 199d79c237afb0d4780ccd2fbf829cea80743df60df4705202558675e07dd2c5 -md5: be43915efc66345cccb3c310b6ed0374 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -- libgfortran -- libgfortran5 >=14.3.0 -constrains: -- openblas >=0.3.30,<0.3.31.0a0 -license: BSD-3-Clause -license_family: BSD -size: 5927939 -timestamp: 1763114673331 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.55-h421ea60_0.conda -sha256: 36ade759122cdf0f16e2a2562a19746d96cf9c863ffaa812f2f5071ebbe9c03c -md5: 5f13ffc7d30ffec87864e678df9957b4 -depends: -- libgcc >=14 -- __glibc >=2.17,<3.0.a0 -- libzlib >=1.3.1,<2.0a0 -license: zlib-acknowledgement -size: 317669 -timestamp: 1770691470744 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.52.0-hf4e2dac_0.conda -sha256: d716847b7deca293d2e49ed1c8ab9e4b9e04b9d780aea49a97c26925b28a7993 -md5: fd893f6a3002a635b5e50ceb9dd2c0f4 -depends: -- __glibc >=2.17,<3.0.a0 -- icu >=78.2,<79.0a0 -- libgcc >=14 -- libzlib >=1.3.1,<2.0a0 -license: blessing -size: 951405 -timestamp: 1772818874251 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-15.2.0-h934c35e_18.conda -sha256: 78668020064fdaa27e9ab65cd2997e2c837b564ab26ce3bf0e58a2ce1a525c6e -md5: 1b08cd684f34175e4514474793d44bcb -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc 15.2.0 he0feb66_18 -constrains: -- libstdcxx-ng ==15.2.0=*_18 -license: GPL-3.0-only WITH GCC-exception-3.1 -license_family: GPL -size: 5852330 -timestamp: 1771378262446 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.7.1-h9d88235_1.conda -sha256: e5f8c38625aa6d567809733ae04bb71c161a42e44a9fa8227abe61fa5c60ebe0 -md5: cd5a90476766d53e901500df9215e927 -depends: -- __glibc >=2.17,<3.0.a0 -- lerc >=4.0.0,<5.0a0 -- libdeflate >=1.25,<1.26.0a0 -- libgcc >=14 -- libjpeg-turbo >=3.1.0,<4.0a0 -- liblzma >=5.8.1,<6.0a0 -- libstdcxx >=14 -- libwebp-base >=1.6.0,<2.0a0 -- libzlib >=1.3.1,<2.0a0 -- zstd >=1.5.7,<1.6.0a0 -license: HPND -size: 435273 -timestamp: 1762022005702 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.41.3-h5347b49_0.conda -sha256: 1a7539cfa7df00714e8943e18de0b06cceef6778e420a5ee3a2a145773758aee -md5: db409b7c1720428638e7c0d509d3e1b5 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -license: BSD-3-Clause -license_family: BSD -size: 40311 -timestamp: 1766271528534 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.6.0-hd42ef1d_0.conda -sha256: 3aed21ab28eddffdaf7f804f49be7a7d701e8f0e46c856d801270b470820a37b -md5: aea31d2e5b1091feca96fcfe945c3cf9 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -constrains: -- libwebp 1.6.0 -license: BSD-3-Clause -license_family: BSD -size: 429011 -timestamp: 1752159441324 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.17.0-h8a09558_0.conda -sha256: 666c0c431b23c6cec6e492840b176dde533d48b7e6fb8883f5071223433776aa -md5: 92ed62436b625154323d40d5f2f11dd7 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=13 -- pthread-stubs -- xorg-libxau >=1.0.11,<2.0a0 -- xorg-libxdmcp -license: MIT -license_family: MIT -size: 395888 -timestamp: 1727278577118 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.2-h25fd6f3_2.conda -sha256: 55044c403570f0dc26e6364de4dc5368e5f3fc7ff103e867c487e2b5ab2bcda9 -md5: d87ff7921124eccd67248aa483c23fec -depends: -- __glibc >=2.17,<3.0.a0 -constrains: -- zlib 1.3.2 *_2 -license: Zlib -license_family: Other -size: 63629 -timestamp: 1774072609062 -- conda: https://conda.anaconda.org/conda-forge/noarch/markdown-3.10.2-pyhcf101f3_0.conda -sha256: 20e0892592a3e7c683e3d66df704a9425d731486a97c34fc56af4da1106b2b6b -md5: ba0a9221ce1063f31692c07370d062f3 -depends: -- importlib-metadata >=4.4 -- python >=3.10 -- python -license: BSD-3-Clause -license_family: BSD -size: 85893 -timestamp: 1770694658918 -- conda: https://conda.anaconda.org/conda-forge/noarch/markdown-it-py-4.0.0-pyhd8ed1ab_0.conda -sha256: 7b1da4b5c40385791dbc3cc85ceea9fad5da680a27d5d3cb8bfaa185e304a89e -md5: 5b5203189eb668f042ac2b0826244964 -depends: -- mdurl >=0.1,<1 -- python >=3.10 -license: MIT -license_family: MIT -size: 64736 -timestamp: 1754951288511 -- conda: https://conda.anaconda.org/conda-forge/linux-64/markupsafe-3.0.3-py314h67df5f8_1.conda -sha256: c279be85b59a62d5c52f5dd9a4cd43ebd08933809a8416c22c3131595607d4cf -md5: 9a17c4307d23318476d7fbf0fedc0cde -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -- python >=3.14,<3.15.0a0 -- python_abi 3.14.* *_cp314 -constrains: -- jinja2 >=3.0.0 -license: BSD-3-Clause -license_family: BSD -size: 27424 -timestamp: 1772445227915 -- conda: https://conda.anaconda.org/conda-forge/linux-64/mathjax-2.7.7-ha770c72_3.tar.bz2 -sha256: 02fef69bde69db264a12f21386612262f545b6e3e68d8f1ccec19f3eaae58edf -md5: 86e69bd82c2a2c6fd29f5ab7e02b3691 -license: Apache-2.0 -license_family: Apache -size: 22281629 -timestamp: 1662784498331 -- conda: https://conda.anaconda.org/conda-forge/noarch/mdurl-0.1.2-pyhd8ed1ab_1.conda -sha256: 78c1bbe1723449c52b7a9df1af2ee5f005209f67e40b6e1d3c7619127c43b1c7 -md5: 592132998493b3ff25fd7479396e8351 -depends: -- python >=3.9 -license: MIT -license_family: MIT -size: 14465 -timestamp: 1733255681319 -- conda: https://conda.anaconda.org/bioconda/noarch/multiqc-1.33-pyhdfd78af_0.conda -sha256: f005760b13093362fc9c997d603dd487de32ab2e821a3cbce52a42bcb8136517 -md5: 698a8a27c2b9d8a542c70cb47099a75e -depends: -- click -- coloredlogs -- humanize -- importlib-metadata -- jinja2 >=3.0.0 -- jsonschema -- markdown -- natsort -- numpy -- packaging -- pillow >=10.2.0 -- plotly >=5.18 -- polars-lts-cpu -- pyaml-env -- pydantic >=2.7.1 -- python >=3.8,!=3.14.1 -- python-dotenv -- python-kaleido 0.2.1 -- pyyaml >=4 -- requests -- rich >=10 -- rich-click -- spectra >=0.0.10 -- tiktoken -- tqdm -- typeguard -license: GPL-3.0-or-later -license_family: GPL3 -size: 4198799 -timestamp: 1765300743879 -- conda: https://conda.anaconda.org/conda-forge/noarch/narwhals-2.18.1-pyhcf101f3_1.conda -sha256: 541fd4390a0687228b8578247f1536a821d9261389a65585af9d1a6f2a14e1e0 -md5: 30bec5e8f4c3969e2b1bd407c5e52afb -depends: -- python >=3.10 -- python -license: MIT -size: 280459 -timestamp: 1774380620329 -- conda: https://conda.anaconda.org/conda-forge/noarch/natsort-8.4.0-pyhcf101f3_2.conda -sha256: aeb1548eb72e4f198e72f19d242fb695b35add2ac7b2c00e0d83687052867680 -md5: e941e85e273121222580723010bd4fa2 -depends: -- python >=3.9 -- python -license: MIT -license_family: MIT -size: 39262 -timestamp: 1770905275632 -- conda: https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h2d0b736_3.conda -sha256: 3fde293232fa3fca98635e1167de6b7c7fda83caf24b9d6c91ec9eefb4f4d586 -md5: 47e340acb35de30501a76c7c799c41d7 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=13 -license: X11 AND BSD-3-Clause -size: 891641 -timestamp: 1738195959188 -- conda: https://conda.anaconda.org/conda-forge/noarch/networkx-3.6.1-pyhcf101f3_0.conda -sha256: f6a82172afc50e54741f6f84527ef10424326611503c64e359e25a19a8e4c1c6 -md5: a2c1eeadae7a309daed9d62c96012a2b -depends: -- python >=3.11 -- python -constrains: -- numpy >=1.25 -- scipy >=1.11.2 -- matplotlib-base >=3.8 -- pandas >=2.0 -license: BSD-3-Clause -license_family: BSD -size: 1587439 -timestamp: 1765215107045 -- conda: https://conda.anaconda.org/conda-forge/linux-64/nspr-4.38-h29cc59b_0.conda -sha256: e3664264bd936c357523b55c71ed5a30263c6ba278d726a75b1eb112e6fb0b64 -md5: e235d5566c9cc8970eb2798dd4ecf62f -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -- libstdcxx >=14 -license: MPL-2.0 -license_family: MOZILLA -size: 228588 -timestamp: 1762348634537 -- conda: https://conda.anaconda.org/conda-forge/linux-64/nss-3.118-h445c969_0.conda -sha256: 44dd98ffeac859d84a6dcba79a2096193a42fc10b29b28a5115687a680dd6aea -md5: 567fbeed956c200c1db5782a424e58ee -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -- libsqlite >=3.51.0,<4.0a0 -- libstdcxx >=14 -- libzlib >=1.3.1,<2.0a0 -- nspr >=4.38,<5.0a0 -license: MPL-2.0 -license_family: MOZILLA -size: 2057773 -timestamp: 1763485556350 -- conda: https://conda.anaconda.org/conda-forge/linux-64/numpy-2.4.3-py314h2b28147_0.conda -sha256: f2ba8cb0d86a6461a6bcf0d315c80c7076083f72c6733c9290086640723f79ec -md5: 36f5b7eb328bdc204954a2225cf908e2 -depends: -- python -- libstdcxx >=14 -- libgcc >=14 -- __glibc >=2.17,<3.0.a0 -- python_abi 3.14.* *_cp314 -- libcblas >=3.9.0,<4.0a0 -- liblapack >=3.9.0,<4.0a0 -- libblas >=3.9.0,<4.0a0 -constrains: -- numpy-base <0a0 -license: BSD-3-Clause -license_family: BSD -size: 8927860 -timestamp: 1773839233468 -- conda: https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.4-h55fea9a_0.conda -sha256: 3900f9f2dbbf4129cf3ad6acf4e4b6f7101390b53843591c53b00f034343bc4d -md5: 11b3379b191f63139e29c0d19dee24cd -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -- libpng >=1.6.50,<1.7.0a0 -- libstdcxx >=14 -- libtiff >=4.7.1,<4.8.0a0 -- libzlib >=1.3.1,<2.0a0 -license: BSD-2-Clause -license_family: BSD -size: 355400 -timestamp: 1758489294972 -- conda: https://conda.anaconda.org/conda-forge/linux-64/openssl-3.6.1-h35e630c_1.conda -sha256: 44c877f8af015332a5d12f5ff0fb20ca32f896526a7d0cdb30c769df1144fb5c -md5: f61eb8cd60ff9057122a3d338b99c00f -depends: -- __glibc >=2.17,<3.0.a0 -- ca-certificates -- libgcc >=14 -license: Apache-2.0 -license_family: Apache -size: 3164551 -timestamp: 1769555830639 -- conda: https://conda.anaconda.org/conda-forge/noarch/packaging-26.0-pyhcf101f3_0.conda -sha256: c1fc0f953048f743385d31c468b4a678b3ad20caffdeaa94bed85ba63049fd58 -md5: b76541e68fea4d511b1ac46a28dcd2c6 -depends: -- python >=3.8 -- python -license: Apache-2.0 -license_family: APACHE -size: 72010 -timestamp: 1769093650580 -- conda: https://conda.anaconda.org/conda-forge/linux-64/pillow-12.1.1-py314h8ec4b1a_0.conda -sha256: 9e6ec8f3213e8b7d64b0ad45f84c51a2c9eba4398efda31e196c9a56186133ee -md5: 79678378ae235e24b3aa83cee1b38207 -depends: -- python -- libgcc >=14 -- __glibc >=2.17,<3.0.a0 -- libwebp-base >=1.6.0,<2.0a0 -- zlib-ng >=2.3.3,<2.4.0a0 -- python_abi 3.14.* *_cp314 -- tk >=8.6.13,<8.7.0a0 -- libjpeg-turbo >=3.1.2,<4.0a0 -- libxcb >=1.17.0,<2.0a0 -- openjpeg >=2.5.4,<3.0a0 -- lcms2 >=2.18,<3.0a0 -- libtiff >=4.7.1,<4.8.0a0 -- libfreetype >=2.14.1 -- libfreetype6 >=2.14.1 -license: HPND -size: 1073026 -timestamp: 1770794002408 -- conda: https://conda.anaconda.org/conda-forge/noarch/plotly-6.6.0-pyhd8ed1ab_0.conda -sha256: c418d325359fc7a0074cea7f081ef1bce26e114d2da8a0154c5d27ecc87a08e7 -md5: 3e9427ee186846052e81fadde8ebe96a -depends: -- narwhals >=1.15.1 -- packaging -- python >=3.10 -constrains: -- ipywidgets >=7.6 -license: MIT -license_family: MIT -size: 5251872 -timestamp: 1772628857717 -- conda: https://conda.anaconda.org/conda-forge/noarch/polars-1.39.3-pyh58ad624_1.conda -sha256: d332c2d5002fc440ae37ed9679ffc21b552f18d20232390005d1dd3bce0888d3 -md5: d5a4e013a30dd8dfde9ab39f45aaf9c1 -depends: -- polars-runtime-32 ==1.39.3 -- python >=3.10 -- python -constrains: -- numpy >=1.16.0 -- pyarrow >=7.0.0 -- fastexcel >=0.9 -- openpyxl >=3.0.0 -- xlsx2csv >=0.8.0 -- connectorx >=0.3.2 -- deltalake >=1.0.0 -- pyiceberg >=0.7.1 -- altair >=5.4.0 -- great_tables >=0.8.0 -- polars-runtime-32 ==1.39.3 -- polars-runtime-64 ==1.39.3 -- polars-runtime-compat ==1.39.3 -license: MIT -license_family: MIT -size: 533495 -timestamp: 1774207987966 -- conda: https://conda.anaconda.org/conda-forge/noarch/polars-lts-cpu-1.34.0.deprecated-hc364b38_0.conda -sha256: e466fb31f67ba9bde18deafeb34263ca5eb25807f39ead0e9d753a8e82c4c4f4 -md5: ef0340e75068ac8ff96462749b5c98e7 -depends: -- polars >=1.34.0 -- polars-runtime-compat >=1.34.0 -license: MIT -license_family: MIT -size: 3902 -timestamp: 1760206808444 -- conda: https://conda.anaconda.org/conda-forge/linux-64/polars-runtime-32-1.39.3-py310hffdcd12_1.conda -noarch: python -sha256: 9744f8086bb0832998f5b01076f57ddc9efbe460e493b14303c3567dc4f401e7 -md5: f9327f9f2cfc4215f55b613e64afd3ba -depends: -- python -- libstdcxx >=14 -- libgcc >=14 -- __glibc >=2.17,<3.0.a0 -- _python_abi3_support 1.* -- cpython >=3.10 -constrains: -- __glibc >=2.17 -license: MIT -license_family: MIT -size: 37570276 -timestamp: 1774207987966 -- conda: https://conda.anaconda.org/conda-forge/linux-64/polars-runtime-compat-1.39.3-py310hbcd5346_1.conda -noarch: python -sha256: bf0b932713f0f27924f42159c98426e0073bb6145ed796eaa4cec79ca05363c7 -md5: 4b9b312453eebd6fbdbbe2a88fa1b5c4 -depends: -- python -- libgcc >=14 -- libstdcxx >=14 -- __glibc >=2.17,<3.0.a0 -- _python_abi3_support 1.* -- cpython >=3.10 -constrains: -- __glibc >=2.17 -license: MIT -license_family: MIT -size: 37224264 -timestamp: 1774207985377 -- conda: https://conda.anaconda.org/conda-forge/linux-64/procps-ng-4.0.6-h18c060e_0.conda -sha256: 4ce2e1ee31a6217998f78c31ce7dc0a3e0557d9238b51d49dd20c52d467a126d -md5: f2c23a77b25efcad57d377b34bd84941 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -- ncurses >=6.5,<7.0a0 -license: GPL-2.0-or-later AND LGPL-2.0-or-later -license_family: GPL -size: 593603 -timestamp: 1769710381284 -- conda: https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-hb9d3cd8_1002.conda -sha256: 9c88f8c64590e9567c6c80823f0328e58d3b1efb0e1c539c0315ceca764e0973 -md5: b3c17d95b5a10c6e64a21fa17573e70e -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=13 -license: MIT -license_family: MIT -size: 8252 -timestamp: 1726802366959 -- conda: https://conda.anaconda.org/conda-forge/noarch/pyaml-env-1.2.2-pyhd8ed1ab_0.conda -sha256: 58994e0d2ea8584cb399546e6f6896d771995e6121d1a7b6a2c9948388358932 -md5: e17be1016bcc3516827b836cd3e4d9dc -depends: -- python >=3.9 -- pyyaml >=5.0,<=7.0 -license: MIT -license_family: MIT -size: 14645 -timestamp: 1736766960536 -- conda: https://conda.anaconda.org/conda-forge/noarch/pydantic-2.12.5-pyhcf101f3_1.conda -sha256: 868569d9505b7fe246c880c11e2c44924d7613a8cdcc1f6ef85d5375e892f13d -md5: c3946ed24acdb28db1b5d63321dbca7d -depends: -- typing-inspection >=0.4.2 -- typing_extensions >=4.14.1 -- python >=3.10 -- typing-extensions >=4.6.1 -- annotated-types >=0.6.0 -- pydantic-core ==2.41.5 -- python -license: MIT -license_family: MIT -size: 340482 -timestamp: 1764434463101 -- conda: https://conda.anaconda.org/conda-forge/linux-64/pydantic-core-2.41.5-py314h2e6c369_1.conda -sha256: 7e0ae379796e28a429f8e48f2fe22a0f232979d65ec455e91f8dac689247d39f -md5: 432b0716a1dfac69b86aa38fdd59b7e6 -depends: -- python -- typing-extensions >=4.6.0,!=4.7.0 -- libgcc >=14 -- __glibc >=2.17,<3.0.a0 -- python_abi 3.14.* *_cp314 -constrains: -- __glibc >=2.17 -license: MIT -license_family: MIT -size: 1943088 -timestamp: 1762988995556 -- conda: https://conda.anaconda.org/conda-forge/noarch/pygments-2.19.2-pyhd8ed1ab_0.conda -sha256: 5577623b9f6685ece2697c6eb7511b4c9ac5fb607c9babc2646c811b428fd46a -md5: 6b6ece66ebcae2d5f326c77ef2c5a066 -depends: -- python >=3.9 -license: BSD-2-Clause -license_family: BSD -size: 889287 -timestamp: 1750615908735 -- conda: https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha55dd90_7.conda -sha256: ba3b032fa52709ce0d9fd388f63d330a026754587a2f461117cac9ab73d8d0d8 -md5: 461219d1a5bd61342293efa2c0c90eac -depends: -- __unix -- python >=3.9 -license: BSD-3-Clause -license_family: BSD -size: 21085 -timestamp: 1733217331982 -- conda: https://conda.anaconda.org/conda-forge/linux-64/python-3.14.3-h32b2ec7_101_cp314.conda -build_number: 101 -sha256: cb0628c5f1732f889f53a877484da98f5a0e0f47326622671396fb4f2b0cd6bd -md5: c014ad06e60441661737121d3eae8a60 -depends: -- __glibc >=2.17,<3.0.a0 -- bzip2 >=1.0.8,<2.0a0 -- ld_impl_linux-64 >=2.36.1 -- libexpat >=2.7.3,<3.0a0 -- libffi >=3.5.2,<3.6.0a0 -- libgcc >=14 -- liblzma >=5.8.2,<6.0a0 -- libmpdec >=4.0.0,<5.0a0 -- libsqlite >=3.51.2,<4.0a0 -- libuuid >=2.41.3,<3.0a0 -- libzlib >=1.3.1,<2.0a0 -- ncurses >=6.5,<7.0a0 -- openssl >=3.5.5,<4.0a0 -- python_abi 3.14.* *_cp314 -- readline >=8.3,<9.0a0 -- tk >=8.6.13,<8.7.0a0 -- tzdata -- zstd >=1.5.7,<1.6.0a0 -license: Python-2.0 -size: 36702440 -timestamp: 1770675584356 -python_site_packages_path: lib/python3.14/site-packages -- conda: https://conda.anaconda.org/conda-forge/noarch/python-dotenv-1.2.2-pyhcf101f3_0.conda -sha256: 74e417a768f59f02a242c25e7db0aa796627b5bc8c818863b57786072aeb85e5 -md5: 130584ad9f3a513cdd71b1fdc1244e9c -depends: -- python >=3.10 -license: BSD-3-Clause -license_family: BSD -size: 27848 -timestamp: 1772388605021 -- conda: https://conda.anaconda.org/conda-forge/noarch/python-gil-3.14.3-h4df99d1_101.conda -sha256: 233aebd94c704ac112afefbb29cf4170b7bc606e22958906f2672081bc50638a -md5: 235765e4ea0d0301c75965985163b5a1 -depends: -- cpython 3.14.3.* -- python_abi * *_cp314 -license: Python-2.0 -size: 50062 -timestamp: 1770674497152 -- conda: https://conda.anaconda.org/conda-forge/noarch/python-kaleido-0.2.1-pyhd8ed1ab_0.tar.bz2 -sha256: e17bf63a30aec33432f1ead86e15e9febde9fc40a7f869c0e766be8d2db44170 -md5: 310259a5b03ff02289d7705f39e2b1d2 -depends: -- kaleido-core 0.2.1.* -- python >=3.5 -license: MIT -license_family: MIT -size: 18320 -timestamp: 1615204747600 -- conda: https://conda.anaconda.org/conda-forge/noarch/python_abi-3.14-8_cp314.conda -build_number: 8 -sha256: ad6d2e9ac39751cc0529dd1566a26751a0bf2542adb0c232533d32e176e21db5 -md5: 0539938c55b6b1a59b560e843ad864a4 -constrains: -- python 3.14.* *_cp314 -license: BSD-3-Clause -license_family: BSD -size: 6989 -timestamp: 1752805904792 -- conda: https://conda.anaconda.org/conda-forge/linux-64/pyyaml-6.0.3-py314h67df5f8_1.conda -sha256: b318fb070c7a1f89980ef124b80a0b5ccf3928143708a85e0053cde0169c699d -md5: 2035f68f96be30dc60a5dfd7452c7941 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -- python >=3.14,<3.15.0a0 -- python_abi 3.14.* *_cp314 -- yaml >=0.2.5,<0.3.0a0 -license: MIT -license_family: MIT -size: 202391 -timestamp: 1770223462836 -- conda: https://conda.anaconda.org/conda-forge/linux-64/readline-8.3-h853b02a_0.conda -sha256: 12ffde5a6f958e285aa22c191ca01bbd3d6e710aa852e00618fa6ddc59149002 -md5: d7d95fc8287ea7bf33e0e7116d2b95ec -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -- ncurses >=6.5,<7.0a0 -license: GPL-3.0-only -license_family: GPL -size: 345073 -timestamp: 1765813471974 -- conda: https://conda.anaconda.org/conda-forge/noarch/referencing-0.37.0-pyhcf101f3_0.conda -sha256: 0577eedfb347ff94d0f2fa6c052c502989b028216996b45c7f21236f25864414 -md5: 870293df500ca7e18bedefa5838a22ab -depends: -- attrs >=22.2.0 -- python >=3.10 -- rpds-py >=0.7.0 -- typing_extensions >=4.4.0 -- python -license: MIT -license_family: MIT -size: 51788 -timestamp: 1760379115194 -- conda: https://conda.anaconda.org/conda-forge/linux-64/regex-2026.2.28-py314h5bd0f2a_0.conda -sha256: e085e336f1446f5263a3ec9747df8c719b6996753901181add50dc4fdd8bb2e8 -md5: 3c8b6a8c4d0ff5a264e9831eac4941f4 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -- python >=3.14,<3.15.0a0 -- python_abi 3.14.* *_cp314 -license: Apache-2.0 AND CNRI-Python -license_family: PSF -size: 411924 -timestamp: 1772255161535 -- conda: https://conda.anaconda.org/conda-forge/noarch/requests-2.32.5-pyhcf101f3_1.conda -sha256: 7813c38b79ae549504b2c57b3f33394cea4f2ad083f0994d2045c2e24cb538c5 -md5: c65df89a0b2e321045a9e01d1337b182 -depends: -- python >=3.10 -- certifi >=2017.4.17 -- charset-normalizer >=2,<4 -- idna >=2.5,<4 -- urllib3 >=1.21.1,<3 -- python -constrains: -- chardet >=3.0.2,<6 -license: Apache-2.0 -license_family: APACHE -size: 63602 -timestamp: 1766926974520 -- conda: https://conda.anaconda.org/conda-forge/noarch/rich-14.3.3-pyhcf101f3_0.conda -sha256: b06ce84d6a10c266811a7d3adbfa1c11f13393b91cc6f8a5b468277d90be9590 -md5: 7a6289c50631d620652f5045a63eb573 -depends: -- markdown-it-py >=2.2.0 -- pygments >=2.13.0,<3.0.0 -- python >=3.10 -- typing_extensions >=4.0.0,<5.0.0 -- python -license: MIT -license_family: MIT -size: 208472 -timestamp: 1771572730357 -- conda: https://conda.anaconda.org/conda-forge/noarch/rich-click-1.9.7-pyh8f84b5b_0.conda -sha256: aa3fcb167321bae51998de2e94d199109c9024f25a5a063cb1c28d8f1af33436 -md5: 0c20a8ebcddb24a45da89d5e917e6cb9 -depends: -- python >=3.10 -- rich >=12 -- click >=8 -- typing-extensions >=4 -- __unix -- python -license: MIT -license_family: MIT -size: 64356 -timestamp: 1769850479089 -- conda: https://conda.anaconda.org/conda-forge/linux-64/rpds-py-0.30.0-py314h2e6c369_0.conda -sha256: e53b0cbf3b324eaa03ca1fe1a688fdf4ab42cea9c25270b0a7307d8aaaa4f446 -md5: c1c368b5437b0d1a68f372ccf01cb133 -depends: -- python -- libgcc >=14 -- __glibc >=2.17,<3.0.a0 -- python_abi 3.14.* *_cp314 -constrains: -- __glibc >=2.17 -license: MIT -license_family: MIT -size: 376121 -timestamp: 1764543122774 -- conda: https://conda.anaconda.org/conda-forge/noarch/spectra-0.0.11-pyhd8ed1ab_2.conda -sha256: 7c65782d2511738e62c70462e89d65da4fa54d5a7e47c46667bcd27a59f81876 -md5: 472239e4eb7b5a84bb96b3ed7e3a596a -depends: -- colormath >=3.0.0 -- python >=3.9 -license: MIT -license_family: MIT -size: 22284 -timestamp: 1735770589188 -- conda: https://conda.anaconda.org/conda-forge/linux-64/sqlite-3.52.0-h04a0ce9_0.conda -sha256: c9af81e7830d9c4b67a7f48e512d060df2676b29cac59e3b31f09dbfcee29c58 -md5: 7d9d7efe9541d4bb71b5934e8ee348ea -depends: -- __glibc >=2.17,<3.0.a0 -- icu >=78.2,<79.0a0 -- libgcc >=14 -- libsqlite 3.52.0 hf4e2dac_0 -- libzlib >=1.3.1,<2.0a0 -- ncurses >=6.5,<7.0a0 -- readline >=8.3,<9.0a0 -license: blessing -size: 203641 -timestamp: 1772818888368 -- conda: https://conda.anaconda.org/conda-forge/linux-64/tiktoken-0.12.0-py314h67fec18_3.conda -sha256: 7e395d67fd249d901beb1ae269057763c0d8c3ee5f7a348694bdb16d158a37d9 -md5: d705f9d8a1185a2b01cced191177a028 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -- libstdcxx >=14 -- python >=3.14,<3.15.0a0 -- python_abi 3.14.* *_cp314 -- regex >=2022.1.18 -- requests >=2.26.0 -constrains: -- __glibc >=2.17 -license: MIT -license_family: MIT -size: 939648 -timestamp: 1764028306357 -- conda: https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h366c992_103.conda -sha256: cafeec44494f842ffeca27e9c8b0c27ed714f93ac77ddadc6aaf726b5554ebac -md5: cffd3bdd58090148f4cfcd831f4b26ab -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -- libzlib >=1.3.1,<2.0a0 -constrains: -- xorg-libx11 >=1.8.12,<2.0a0 -license: TCL -license_family: BSD -size: 3301196 -timestamp: 1769460227866 -- conda: https://conda.anaconda.org/conda-forge/noarch/tqdm-4.67.3-pyh8f84b5b_0.conda -sha256: 9ef8e47cf00e4d6dcc114eb32a1504cc18206300572ef14d76634ba29dfe1eb6 -md5: e5ce43272193b38c2e9037446c1d9206 -depends: -- python >=3.10 -- __unix -- python -license: MPL-2.0 and MIT -size: 94132 -timestamp: 1770153424136 -- conda: https://conda.anaconda.org/conda-forge/noarch/typeguard-4.5.1-pyhd8ed1ab_0.conda -sha256: 39d8ae33c43cdb8f771373e149b0b4fae5a08960ac58dcca95b2f1642bb17448 -md5: 260af1b0a94f719de76b4e14094e9a3b -depends: -- importlib-metadata >=3.6 -- python >=3.10 -- typing-extensions >=4.10.0 -- typing_extensions >=4.14.0 -constrains: -- pytest >=7 -license: MIT -license_family: MIT -size: 36838 -timestamp: 1771532971545 -- conda: https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.15.0-h396c80c_0.conda -sha256: 7c2df5721c742c2a47b2c8f960e718c930031663ac1174da67c1ed5999f7938c -md5: edd329d7d3a4ab45dcf905899a7a6115 -depends: -- typing_extensions ==4.15.0 pyhcf101f3_0 -license: PSF-2.0 -license_family: PSF -size: 91383 -timestamp: 1756220668932 -- conda: https://conda.anaconda.org/conda-forge/noarch/typing-inspection-0.4.2-pyhd8ed1ab_1.conda -sha256: 70db27de58a97aeb7ba7448366c9853f91b21137492e0b4430251a1870aa8ff4 -md5: a0a4a3035667fc34f29bfbd5c190baa6 -depends: -- python >=3.10 -- typing_extensions >=4.12.0 -license: MIT -license_family: MIT -size: 18923 -timestamp: 1764158430324 -- conda: https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.15.0-pyhcf101f3_0.conda -sha256: 032271135bca55aeb156cee361c81350c6f3fb203f57d024d7e5a1fc9ef18731 -md5: 0caa1af407ecff61170c9437a808404d -depends: -- python >=3.10 -- python -license: PSF-2.0 -license_family: PSF -size: 51692 -timestamp: 1756220668932 -- conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2025c-hc9c84f9_1.conda -sha256: 1d30098909076af33a35017eed6f2953af1c769e273a0626a04722ac4acaba3c -md5: ad659d0a2b3e47e38d829aa8cad2d610 -license: LicenseRef-Public-Domain -size: 119135 -timestamp: 1767016325805 -- conda: https://conda.anaconda.org/conda-forge/noarch/urllib3-2.6.3-pyhd8ed1ab_0.conda -sha256: af641ca7ab0c64525a96fd9ad3081b0f5bcf5d1cbb091afb3f6ed5a9eee6111a -md5: 9272daa869e03efe68833e3dc7a02130 -depends: -- backports.zstd >=1.0.0 -- brotli-python >=1.2.0 -- h2 >=4,<5 -- pysocks >=1.5.6,<2.0,!=1.5.7 -- python >=3.10 -license: MIT -license_family: MIT -size: 103172 -timestamp: 1767817860341 -- conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.12-hb03c661_1.conda -sha256: 6bc6ab7a90a5d8ac94c7e300cc10beb0500eeba4b99822768ca2f2ef356f731b -md5: b2895afaf55bf96a8c8282a2e47a5de0 -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -license: MIT -license_family: MIT -size: 15321 -timestamp: 1762976464266 -- conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.5-hb03c661_1.conda -sha256: 25d255fb2eef929d21ff660a0c687d38a6d2ccfbcbf0cc6aa738b12af6e9d142 -md5: 1dafce8548e38671bea82e3f5c6ce22f -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -license: MIT -license_family: MIT -size: 20591 -timestamp: 1762976546182 -- conda: https://conda.anaconda.org/conda-forge/linux-64/yaml-0.2.5-h280c20c_3.conda -sha256: 6d9ea2f731e284e9316d95fa61869fe7bbba33df7929f82693c121022810f4ad -md5: a77f85f77be52ff59391544bfe73390a -depends: -- libgcc >=14 -- __glibc >=2.17,<3.0.a0 -license: MIT -license_family: MIT -size: 85189 -timestamp: 1753484064210 -- conda: https://conda.anaconda.org/conda-forge/noarch/zipp-3.23.0-pyhcf101f3_1.conda -sha256: b4533f7d9efc976511a73ef7d4a2473406d7f4c750884be8e8620b0ce70f4dae -md5: 30cd29cb87d819caead4d55184c1d115 -depends: -- python >=3.10 -- python -license: MIT -license_family: MIT -size: 24194 -timestamp: 1764460141901 -- conda: https://conda.anaconda.org/conda-forge/linux-64/zlib-ng-2.3.3-hceb46e0_1.conda -sha256: ea4e50c465d70236408cb0bfe0115609fd14db1adcd8bd30d8918e0291f8a75f -md5: 2aadb0d17215603a82a2a6b0afd9a4cb -depends: -- __glibc >=2.17,<3.0.a0 -- libgcc >=14 -- libstdcxx >=14 -license: Zlib -license_family: Other -size: 122618 -timestamp: 1770167931827 -- conda: https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.7-hb78ec9c_6.conda -sha256: 68f0206ca6e98fea941e5717cec780ed2873ffabc0e1ed34428c061e2c6268c7 -md5: 4a13eeac0b5c8e5b8ab496e6c4ddd829 -depends: -- __glibc >=2.17,<3.0.a0 -- libzlib >=1.3.1,<2.0a0 -license: BSD-3-Clause -license_family: BSD -size: 601375 -timestamp: 1764777111296 diff --git a/modules/nf-core/multiqc/.conda-lock/linux_amd64-bd-db7c73dae76bc9e6_1.txt b/modules/nf-core/multiqc/.conda-lock/linux_amd64-bd-db7c73dae76bc9e6_1.txt deleted file mode 100644 index a55a4d49..00000000 --- a/modules/nf-core/multiqc/.conda-lock/linux_amd64-bd-db7c73dae76bc9e6_1.txt +++ /dev/null @@ -1,126 +0,0 @@ - -# This file may be used to create an environment using: -# $ conda create --name --file -# platform: linux-64 -@EXPLICIT -https://conda.anaconda.org/conda-forge/linux-64/libgomp-15.2.0-he0feb66_18.conda#239c5e9546c38a1e884d69effcf4c882 -https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-20_gnu.conda#a9f577daf3de00bca7c3c76c0ecbd1de -https://conda.anaconda.org/conda-forge/linux-64/libgcc-15.2.0-he0feb66_18.conda#0aa00f03f9e39fb9876085dee11a85d4 -https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hda65f42_9.conda#d2ffd7602c02f2b316fd921d39876885 -https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.2-h25fd6f3_2.conda#d87ff7921124eccd67248aa483c23fec -https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.7-hb78ec9c_6.conda#4a13eeac0b5c8e5b8ab496e6c4ddd829 -https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.45.1-default_hbd61a6d_102.conda#18335a698559cdbcd86150a48bf54ba6 -https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.5-hecca717_0.conda#49f570f3bc4c874a06ea69b7225753af -https://conda.anaconda.org/conda-forge/linux-64/libffi-3.5.2-h3435931_0.conda#a360c33a5abe61c07959e449fa1453eb -https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.3-hb03c661_0.conda#b88d90cad08e6bc8ad540cb310a761fb -https://conda.anaconda.org/conda-forge/linux-64/libmpdec-4.0.0-hb03c661_1.conda#2c21e66f50753a083cbe6b80f38268fa -https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-15.2.0-h934c35e_18.conda#1b08cd684f34175e4514474793d44bcb -https://conda.anaconda.org/conda-forge/linux-64/icu-78.3-h33c6efd_0.conda#c80d8a3b84358cb967fa81e7075fbc8a -https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.53.0-hf4e2dac_0.conda#810d83373448da85c3f673fbcb7ad3a3 -https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.42-h5347b49_0.conda#38ffe67b78c9d4de527be8315e5ada2c -https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h2d0b736_3.conda#47e340acb35de30501a76c7c799c41d7 -https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2026.4.22-hbd8a1cb_0.conda#e18ad67cf881dcadee8b8d9e2f8e5f73 -https://conda.anaconda.org/conda-forge/linux-64/openssl-3.6.2-h35e630c_0.conda#da1b85b6a87e141f5140bb9924cecab0 -https://conda.anaconda.org/conda-forge/noarch/python_abi-3.14-8_cp314.conda#0539938c55b6b1a59b560e843ad864a4 -https://conda.anaconda.org/conda-forge/linux-64/readline-8.3-h853b02a_0.conda#d7d95fc8287ea7bf33e0e7116d2b95ec -https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h366c992_103.conda#cffd3bdd58090148f4cfcd831f4b26ab -https://conda.anaconda.org/conda-forge/noarch/tzdata-2025c-hc9c84f9_1.conda#ad659d0a2b3e47e38d829aa8cad2d610 -https://conda.anaconda.org/conda-forge/linux-64/python-3.14.4-habeac84_100_cp314.conda#a443f87920815d41bfe611296e507995 -https://conda.anaconda.org/conda-forge/noarch/cpython-3.14.4-py314hd8ed1ab_100.conda#f111d4cfaf1fe9496f386bc98ae94452 -https://conda.anaconda.org/conda-forge/noarch/python-gil-3.14.4-h4df99d1_100.conda#e4e60721757979d01d3964122f674959 -https://conda.anaconda.org/conda-forge/noarch/_python_abi3_support-1.0-hd8ed1ab_2.conda#aaa2a381ccc56eac91d63b6c1240312f -https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.15.0-pyhcf101f3_0.conda#0caa1af407ecff61170c9437a808404d -https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.15.0-h396c80c_0.conda#edd329d7d3a4ab45dcf905899a7a6115 -https://conda.anaconda.org/conda-forge/noarch/annotated-types-0.7.0-pyhd8ed1ab_1.conda#2934f256a8acfe48f6ebb4fce6cde29c -https://conda.anaconda.org/conda-forge/noarch/attrs-26.1.0-pyhcf101f3_0.conda#c6b0543676ecb1fb2d7643941fe375f2 -https://conda.anaconda.org/conda-forge/noarch/backports.zstd-1.3.0-py314h680f03e_0.conda#a2ac7763a9ac75055b68f325d3255265 -https://conda.anaconda.org/conda-forge/linux-64/brotli-python-1.2.0-py314h3de4e8d_1.conda#8910d2c46f7e7b519129f486e0fe927a -https://conda.anaconda.org/conda-forge/noarch/certifi-2026.4.22-pyhd8ed1ab_0.conda#929471569c93acefb30282a22060dcd5 -https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.4.7-pyhd8ed1ab_0.conda#a9167b9571f3baa9d448faa2139d1089 -https://conda.anaconda.org/conda-forge/noarch/click-8.3.2-pyhc90fa1f_0.conda#4d18bc3af7cfcea97bd817164672a08c -https://conda.anaconda.org/conda-forge/noarch/humanfriendly-10.0-pyh707e725_8.conda#7fe569c10905402ed47024fc481bb371 -https://conda.anaconda.org/conda-forge/noarch/coloredlogs-15.0.1-pyhd8ed1ab_4.conda#b866ff7007b934d564961066c8195983 -https://conda.anaconda.org/conda-forge/noarch/networkx-3.6.1-pyhcf101f3_0.conda#a2c1eeadae7a309daed9d62c96012a2b -https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-15.2.0-h68bc16d_18.conda#646855f357199a12f02a87382d429b75 -https://conda.anaconda.org/conda-forge/linux-64/libgfortran-15.2.0-h69a702a_18.conda#9063115da5bc35fdc3e1002e69b9ef6e -https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.32-pthreads_h94d23a6_0.conda#89d61bc91d3f39fda0ca10fcd3c68594 -https://conda.anaconda.org/conda-forge/linux-64/libblas-3.11.0-6_h4a7cf45_openblas.conda#6d6d225559bfa6e2f3c90ee9c03d4e2e -https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.11.0-6_h0358290_openblas.conda#36ae340a916635b97ac8a0655ace2a35 -https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.11.0-6_h47877c9_openblas.conda#881d801569b201c2e753f03c84b85e15 -https://conda.anaconda.org/conda-forge/linux-64/numpy-2.4.3-py314h2b28147_0.conda#36f5b7eb328bdc204954a2225cf908e2 -https://conda.anaconda.org/conda-forge/noarch/colormath-3.0.0-pyhd8ed1ab_4.conda#071cf7b0ce333c81718b054066c15102 -https://conda.anaconda.org/conda-forge/linux-64/expat-2.7.5-hecca717_0.conda#7de50d165039df32d38be74c1b34a910 -https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45 -https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6 -https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb -https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_3.conda#49023d73832ef61042f6a237cb2687e7 -https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.58-h421ea60_0.conda#eba48a68a1a2b9d3c0d9511548db85db -https://conda.anaconda.org/conda-forge/linux-64/libfreetype6-2.14.3-h73754d4_0.conda#fb16b4b69e3f1dcfe79d80db8fd0c55d -https://conda.anaconda.org/conda-forge/linux-64/libfreetype-2.14.3-ha770c72_0.conda#e289f3d17880e44b633ba911d57a321b -https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.17.1-h27c8c51_0.conda#867127763fbe935bab59815b6e0b7b5c -https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-hc364b38_1.conda#a7970cd949a077b7cb9696379d338681 -https://conda.anaconda.org/conda-forge/noarch/hpack-4.1.0-pyhd8ed1ab_0.conda#0a802cb9888dd14eeefc611f05c40b6e -https://conda.anaconda.org/conda-forge/noarch/hyperframe-6.1.0-pyhd8ed1ab_0.conda#8e6923fc12f1fe8f8c4e5c9f343256ac -https://conda.anaconda.org/conda-forge/noarch/h2-4.3.0-pyhcf101f3_0.conda#164fc43f0b53b6e3a7bc7dce5e4f1dc9 -https://conda.anaconda.org/conda-forge/noarch/humanize-4.15.0-pyhd8ed1ab_0.conda#daddf757c3ecd6067b9af1df1f25d89e -https://conda.anaconda.org/conda-forge/noarch/idna-3.13-pyhcf101f3_0.conda#fb7130c190f9b4ec91219840a05ba3ac -https://conda.anaconda.org/conda-forge/noarch/zipp-3.23.1-pyhcf101f3_0.conda#e1c36c6121a7c9c76f2f148f1e83b983 -https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-8.8.0-pyhcf101f3_0.conda#080594bf4493e6bae2607e65390c520a -https://conda.anaconda.org/conda-forge/linux-64/markupsafe-3.0.3-py314h67df5f8_1.conda#9a17c4307d23318476d7fbf0fedc0cde -https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.6-pyhcf101f3_1.conda#04558c96691bed63104678757beb4f8d -https://conda.anaconda.org/conda-forge/linux-64/rpds-py-0.30.0-py314h2e6c369_0.conda#c1c368b5437b0d1a68f372ccf01cb133 -https://conda.anaconda.org/conda-forge/noarch/referencing-0.37.0-pyhcf101f3_0.conda#870293df500ca7e18bedefa5838a22ab -https://conda.anaconda.org/conda-forge/noarch/jsonschema-specifications-2025.9.1-pyhcf101f3_0.conda#439cd0f567d697b20a8f45cb70a1005a -https://conda.anaconda.org/conda-forge/noarch/jsonschema-4.26.0-pyhcf101f3_0.conda#ada41c863af263cc4c5fcbaff7c3e4dc -https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-15.2.0-h69a702a_18.conda#d5e96b1ed75ca01906b3d2469b4ce493 -https://conda.anaconda.org/conda-forge/linux-64/mathjax-2.7.7-ha770c72_3.tar.bz2#86e69bd82c2a2c6fd29f5ab7e02b3691 -https://conda.anaconda.org/conda-forge/linux-64/nspr-4.38-h29cc59b_0.conda#e235d5566c9cc8970eb2798dd4ecf62f -https://conda.anaconda.org/conda-forge/linux-64/nss-3.118-h445c969_0.conda#567fbeed956c200c1db5782a424e58ee -https://conda.anaconda.org/conda-forge/linux-64/sqlite-3.53.0-h04a0ce9_0.conda#dc540e5bd5616d83a1ec46af8315ff98 -https://conda.anaconda.org/conda-forge/linux-64/kaleido-core-0.2.1-h3644ca4_0.tar.bz2#b3723b235b0758abaae8c82ce4d80146 -https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.1.4.1-hb03c661_0.conda#6178c6f2fb254558238ef4e6c56fb782 -https://conda.anaconda.org/conda-forge/linux-64/lerc-4.1.0-hdb68285_0.conda#a752488c68f2e7c456bcbd8f16eec275 -https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.25-h17f619e_0.conda#6c77a605a7a689d17d4819c0f8ac9a00 -https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.6.0-hd42ef1d_0.conda#aea31d2e5b1091feca96fcfe945c3cf9 -https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.7.1-h9d88235_1.conda#cd5a90476766d53e901500df9215e927 -https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.18-h0c24ade_0.conda#6f2e2c8f58160147c4d1c6f4c14cbac4 -https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-hb9d3cd8_1002.conda#b3c17d95b5a10c6e64a21fa17573e70e -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.12-hb03c661_1.conda#b2895afaf55bf96a8c8282a2e47a5de0 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.5-hb03c661_1.conda#1dafce8548e38671bea82e3f5c6ce22f -https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.17.0-h8a09558_0.conda#92ed62436b625154323d40d5f2f11dd7 -https://conda.anaconda.org/conda-forge/noarch/markdown-3.10.2-pyhcf101f3_0.conda#ba0a9221ce1063f31692c07370d062f3 -https://conda.anaconda.org/conda-forge/noarch/mdurl-0.1.2-pyhd8ed1ab_1.conda#592132998493b3ff25fd7479396e8351 -https://conda.anaconda.org/conda-forge/noarch/markdown-it-py-4.0.0-pyhd8ed1ab_0.conda#5b5203189eb668f042ac2b0826244964 -https://conda.anaconda.org/conda-forge/noarch/natsort-8.4.0-pyhcf101f3_2.conda#e941e85e273121222580723010bd4fa2 -https://conda.anaconda.org/conda-forge/noarch/packaging-26.1-pyhc364b38_0.conda#b8ae38639d323d808da535fb71e31be8 -https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.4-h55fea9a_0.conda#11b3379b191f63139e29c0d19dee24cd -https://conda.anaconda.org/conda-forge/linux-64/zlib-ng-2.3.3-hceb46e0_1.conda#2aadb0d17215603a82a2a6b0afd9a4cb -https://conda.anaconda.org/conda-forge/linux-64/pillow-12.2.0-py314h8ec4b1a_0.conda#76c4757c0ec9d11f969e8eb44899307b -https://conda.anaconda.org/conda-forge/noarch/narwhals-2.20.0-pyhcf101f3_0.conda#6cac1a50359219d786453c6fef819f98 -https://conda.anaconda.org/conda-forge/noarch/plotly-6.6.0-pyhd8ed1ab_0.conda#3e9427ee186846052e81fadde8ebe96a -https://conda.anaconda.org/conda-forge/linux-64/polars-runtime-32-1.40.0-py310hffdcd12_0.conda#8eacf9ff4d4e1ca1b52f8f3ba3e0c993 -https://conda.anaconda.org/conda-forge/noarch/polars-1.40.0-pyh58ad624_0.conda#fd16be490f5403adfbf27dd4901bbe34 -https://conda.anaconda.org/conda-forge/linux-64/polars-runtime-compat-1.40.0-py310hbcd5346_0.conda#03a6899e17bb731c8e21b08212f1a64c -https://conda.anaconda.org/conda-forge/noarch/polars-lts-cpu-1.34.0.deprecated-hc364b38_0.conda#ef0340e75068ac8ff96462749b5c98e7 -https://conda.anaconda.org/conda-forge/linux-64/yaml-0.2.5-h280c20c_3.conda#a77f85f77be52ff59391544bfe73390a -https://conda.anaconda.org/conda-forge/linux-64/pyyaml-6.0.3-py314h67df5f8_1.conda#2035f68f96be30dc60a5dfd7452c7941 -https://conda.anaconda.org/conda-forge/noarch/pyaml-env-1.2.2-pyhd8ed1ab_0.conda#e17be1016bcc3516827b836cd3e4d9dc -https://conda.anaconda.org/conda-forge/linux-64/pydantic-core-2.46.3-py314h2e6c369_0.conda#1f3fd537f929b8d3236f9f0f0e7f7a32 -https://conda.anaconda.org/conda-forge/noarch/typing-inspection-0.4.2-pyhd8ed1ab_1.conda#a0a4a3035667fc34f29bfbd5c190baa6 -https://conda.anaconda.org/conda-forge/noarch/pydantic-2.13.3-pyhcf101f3_0.conda#f690e6f204efd2e5c06b57518a383d98 -https://conda.anaconda.org/conda-forge/noarch/python-dotenv-1.2.2-pyhcf101f3_0.conda#130584ad9f3a513cdd71b1fdc1244e9c -https://conda.anaconda.org/conda-forge/noarch/python-kaleido-0.2.1-pyhd8ed1ab_0.tar.bz2#310259a5b03ff02289d7705f39e2b1d2 -https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha55dd90_7.conda#461219d1a5bd61342293efa2c0c90eac -https://conda.anaconda.org/conda-forge/noarch/urllib3-2.6.3-pyhd8ed1ab_0.conda#9272daa869e03efe68833e3dc7a02130 -https://conda.anaconda.org/conda-forge/noarch/requests-2.33.1-pyhcf101f3_0.conda#10afbb4dbf06ff959ad25a92ccee6e59 -https://conda.anaconda.org/conda-forge/noarch/pygments-2.20.0-pyhd8ed1ab_0.conda#16c18772b340887160c79a6acc022db0 -https://conda.anaconda.org/conda-forge/noarch/rich-15.0.0-pyhcf101f3_0.conda#0242025a3c804966bf71aa04eee82f66 -https://conda.anaconda.org/conda-forge/noarch/rich-click-1.9.7-pyh8f84b5b_0.conda#0c20a8ebcddb24a45da89d5e917e6cb9 -https://conda.anaconda.org/conda-forge/noarch/spectra-0.0.11-pyhd8ed1ab_2.conda#472239e4eb7b5a84bb96b3ed7e3a596a -https://conda.anaconda.org/conda-forge/linux-64/regex-2026.4.4-py314h5bd0f2a_0.conda#4ffb42385183c854564f1f9adcf80a63 -https://conda.anaconda.org/conda-forge/linux-64/tiktoken-0.12.0-py314h67fec18_3.conda#d705f9d8a1185a2b01cced191177a028 -https://conda.anaconda.org/conda-forge/noarch/tqdm-4.67.3-pyh8f84b5b_0.conda#e5ce43272193b38c2e9037446c1d9206 -https://conda.anaconda.org/conda-forge/noarch/typeguard-4.5.1-pyhd8ed1ab_0.conda#260af1b0a94f719de76b4e14094e9a3b -https://conda.anaconda.org/bioconda/noarch/multiqc-1.34-pyhdfd78af_0.conda#a7111ab9a6a6146b40cbce16655ac873 -https://conda.anaconda.org/conda-forge/noarch/pip-26.0.1-pyh145f28c_0.conda#09a970fbf75e8ed1aa633827ded6aa4f -https://conda.anaconda.org/conda-forge/linux-64/procps-ng-4.0.6-h18c060e_0.conda#f2c23a77b25efcad57d377b34bd84941 diff --git a/modules/nf-core/multiqc/.conda-lock/linux_arm64-bd-40bf3b435e89dc22_1.txt b/modules/nf-core/multiqc/.conda-lock/linux_arm64-bd-40bf3b435e89dc22_1.txt deleted file mode 100644 index a58231a0..00000000 --- a/modules/nf-core/multiqc/.conda-lock/linux_arm64-bd-40bf3b435e89dc22_1.txt +++ /dev/null @@ -1,1502 +0,0 @@ - -version: 6 -environments: -default: -channels: -- url: https://conda.anaconda.org/conda-forge/ -- url: https://conda.anaconda.org/bioconda/ -- url: https://conda.anaconda.org/bioconda/ -options: -pypi-prerelease-mode: if-necessary-or-explicit -packages: -linux-aarch64: -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/_openmp_mutex-4.5-20_gnu.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/_python_abi3_support-1.0-hd8ed1ab_2.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/annotated-types-0.7.0-pyhd8ed1ab_1.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/attrs-26.1.0-pyhcf101f3_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/backports.zstd-1.3.0-py314h680f03e_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/brotli-python-1.2.0-py314h352cb57_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/bzip2-1.0.8-h4777abc_9.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2026.2.25-hbd8a1cb_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/certifi-2026.2.25-pyhd8ed1ab_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.4.6-pyhd8ed1ab_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/click-8.3.1-pyh8f84b5b_1.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/coloredlogs-15.0.1-pyhd8ed1ab_4.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/colormath-3.0.0-pyhd8ed1ab_4.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/cpython-3.14.3-py314hd8ed1ab_101.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/expat-2.7.4-hfae3067_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2 -- conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2 -- conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2 -- conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_3.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fontconfig-2.17.1-hba86a56_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-hc364b38_1.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/h2-4.3.0-pyhcf101f3_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/hpack-4.1.0-pyhd8ed1ab_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/humanfriendly-10.0-pyh707e725_8.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/humanize-4.15.0-pyhd8ed1ab_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/hyperframe-6.1.0-pyhd8ed1ab_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/icu-78.3-hcab7f73_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/idna-3.11-pyhd8ed1ab_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-8.8.0-pyhcf101f3_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.6-pyhcf101f3_1.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/jsonschema-4.26.0-pyhcf101f3_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/jsonschema-specifications-2025.9.1-pyhcf101f3_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/kaleido-core-0.2.1-he5a581e_0.tar.bz2 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/lcms2-2.18-h9d5b58d_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/ld_impl_linux-aarch64-2.45.1-default_h1979696_102.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/lerc-4.1.0-h52b7260_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libblas-3.11.0-5_haddc8a3_openblas.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcblas-3.11.0-5_hd72aa62_openblas.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libdeflate-1.25-h1af38f5_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libexpat-2.7.4-hfae3067_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libffi-3.5.2-h376a255_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libfreetype-2.14.3-h8af1aa0_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libfreetype6-2.14.3-hdae7a39_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgcc-15.2.0-h8acb6b2_18.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgcc-ng-15.2.0-he9431aa_18.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgfortran-15.2.0-he9431aa_18.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgfortran5-15.2.0-h1b7bec0_18.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgomp-15.2.0-h8acb6b2_18.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libjpeg-turbo-3.1.2-he30d5cf_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/liblapack-3.11.0-5_h88aeb00_openblas.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/liblzma-5.8.2-he30d5cf_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libmpdec-4.0.0-he30d5cf_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libopenblas-0.3.30-pthreads_h9d3fd7e_4.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libpng-1.6.55-h1abf092_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libsqlite-3.52.0-h10b116e_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libstdcxx-15.2.0-hef695bb_18.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libtiff-4.7.1-hdb009f0_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libuuid-2.41.3-h1022ec0_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libwebp-base-1.6.0-ha2e29f5_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libxcb-1.17.0-h262b8f6_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libzlib-1.3.2-hdc9db2a_2.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/markdown-3.10.2-pyhcf101f3_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/markdown-it-py-4.0.0-pyhd8ed1ab_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/markupsafe-3.0.3-py314hb76de3f_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/mathjax-2.7.7-h8af1aa0_3.tar.bz2 -- conda: https://conda.anaconda.org/conda-forge/noarch/mdurl-0.1.2-pyhd8ed1ab_1.conda -- conda: https://conda.anaconda.org/bioconda/noarch/multiqc-1.33-pyhdfd78af_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/narwhals-2.18.1-pyhcf101f3_1.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/natsort-8.4.0-pyhcf101f3_2.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/ncurses-6.5-ha32ae93_3.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/networkx-3.6.1-pyhcf101f3_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/nspr-4.38-h3ad9384_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/nss-3.118-h544fa81_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/numpy-2.4.3-py314haac167e_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/openjpeg-2.5.4-h5da879a_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/openssl-3.6.1-h546c87b_1.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/packaging-26.0-pyhcf101f3_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/pillow-12.1.1-py314hac3e5ec_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/plotly-6.6.0-pyhd8ed1ab_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/polars-1.39.3-pyh58ad624_1.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/polars-lts-cpu-1.34.0.deprecated-hc364b38_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/polars-runtime-32-1.39.3-py310hff09b76_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/polars-runtime-compat-1.39.3-py310hf00a4a2_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/procps-ng-4.0.6-h1779866_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/pthread-stubs-0.4-h86ecc28_1002.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/pyaml-env-1.2.2-pyhd8ed1ab_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/pydantic-2.12.5-pyhcf101f3_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/pydantic-core-2.41.5-py314h451b6cc_1.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/pygments-2.19.2-pyhd8ed1ab_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha55dd90_7.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/python-3.14.3-hb06a95a_101_cp314.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/python-dotenv-1.2.2-pyhcf101f3_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/python-gil-3.14.3-h4df99d1_101.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/python-kaleido-0.2.1-pyhd8ed1ab_0.tar.bz2 -- conda: https://conda.anaconda.org/conda-forge/noarch/python_abi-3.14-8_cp314.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/pyyaml-6.0.3-py314h807365f_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/readline-8.3-hb682ff5_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/referencing-0.37.0-pyhcf101f3_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/regex-2026.2.28-py314h51f160d_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/requests-2.32.5-pyhcf101f3_1.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/rich-14.3.3-pyhcf101f3_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/rich-click-1.9.7-pyh8f84b5b_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/rpds-py-0.30.0-py314h02b7a91_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/spectra-0.0.11-pyhd8ed1ab_2.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/sqlite-3.52.0-hf1c7be2_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/tiktoken-0.12.0-py314h6a36e60_3.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/tk-8.6.13-noxft_h0dc03b3_103.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/tqdm-4.67.3-pyh8f84b5b_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/typeguard-4.5.1-pyhd8ed1ab_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.15.0-h396c80c_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/typing-inspection-0.4.2-pyhd8ed1ab_1.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.15.0-pyhcf101f3_0.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2025c-hc9c84f9_1.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/urllib3-2.6.3-pyhd8ed1ab_0.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxau-1.0.12-he30d5cf_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxdmcp-1.1.5-he30d5cf_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/yaml-0.2.5-h80f16a2_3.conda -- conda: https://conda.anaconda.org/conda-forge/noarch/zipp-3.23.0-pyhcf101f3_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/zlib-ng-2.3.3-ha7cb516_1.conda -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/zstd-1.5.7-h85ac4a6_6.conda -packages: -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/_openmp_mutex-4.5-20_gnu.conda -build_number: 20 -sha256: a2527b1d81792a0ccd2c05850960df119c2b6d8f5fdec97f2db7d25dc23b1068 -md5: 468fd3bb9e1f671d36c2cbc677e56f1d -depends: -- libgomp >=7.5.0 -constrains: -- openmp_impl <0.0a0 -license: BSD-3-Clause -license_family: BSD -size: 28926 -timestamp: 1770939656741 -- conda: https://conda.anaconda.org/conda-forge/noarch/_python_abi3_support-1.0-hd8ed1ab_2.conda -sha256: a3967b937b9abf0f2a99f3173fa4630293979bd1644709d89580e7c62a544661 -md5: aaa2a381ccc56eac91d63b6c1240312f -depends: -- cpython -- python-gil -license: MIT -license_family: MIT -size: 8191 -timestamp: 1744137672556 -- conda: https://conda.anaconda.org/conda-forge/noarch/annotated-types-0.7.0-pyhd8ed1ab_1.conda -sha256: e0ea1ba78fbb64f17062601edda82097fcf815012cf52bb704150a2668110d48 -md5: 2934f256a8acfe48f6ebb4fce6cde29c -depends: -- python >=3.9 -- typing-extensions >=4.0.0 -license: MIT -license_family: MIT -size: 18074 -timestamp: 1733247158254 -- conda: https://conda.anaconda.org/conda-forge/noarch/attrs-26.1.0-pyhcf101f3_0.conda -sha256: 1b6124230bb4e571b1b9401537ecff575b7b109cc3a21ee019f65e083b8399ab -md5: c6b0543676ecb1fb2d7643941fe375f2 -depends: -- python >=3.10 -- python -license: MIT -license_family: MIT -size: 64927 -timestamp: 1773935801332 -- conda: https://conda.anaconda.org/conda-forge/noarch/backports.zstd-1.3.0-py314h680f03e_0.conda -noarch: generic -sha256: c31ab719d256bc6f89926131e88ecd0f0c5d003fe8481852c6424f4ec6c7eb29 -md5: a2ac7763a9ac75055b68f325d3255265 -depends: -- python >=3.14 -license: BSD-3-Clause AND MIT AND EPL-2.0 -size: 7514 -timestamp: 1767044983590 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/brotli-python-1.2.0-py314h352cb57_1.conda -sha256: 5a5b0cdcd7ed89c6a8fb830924967f6314a2b71944bc1ebc2c105781ba97aa75 -md5: a1b5c571a0923a205d663d8678df4792 -depends: -- libgcc >=14 -- libstdcxx >=14 -- python >=3.14,<3.15.0a0 -- python >=3.14,<3.15.0a0 *_cp314 -- python_abi 3.14.* *_cp314 -constrains: -- libbrotlicommon 1.2.0 he30d5cf_1 -license: MIT -license_family: MIT -size: 373193 -timestamp: 1764017486851 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/bzip2-1.0.8-h4777abc_9.conda -sha256: b3495077889dde6bb370938e7db82be545c73e8589696ad0843a32221520ad4c -md5: 840d8fc0d7b3209be93080bc20e07f2d -depends: -- libgcc >=14 -license: bzip2-1.0.6 -license_family: BSD -size: 192412 -timestamp: 1771350241232 -- conda: https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2026.2.25-hbd8a1cb_0.conda -sha256: 67cc7101b36421c5913a1687ef1b99f85b5d6868da3abbf6ec1a4181e79782fc -md5: 4492fd26db29495f0ba23f146cd5638d -depends: -- __unix -license: ISC -size: 147413 -timestamp: 1772006283803 -- conda: https://conda.anaconda.org/conda-forge/noarch/certifi-2026.2.25-pyhd8ed1ab_0.conda -sha256: a6b118fd1ed6099dc4fc03f9c492b88882a780fadaef4ed4f93dc70757713656 -md5: 765c4d97e877cdbbb88ff33152b86125 -depends: -- python >=3.10 -license: ISC -size: 151445 -timestamp: 1772001170301 -- conda: https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.4.6-pyhd8ed1ab_0.conda -sha256: d86dfd428b2e3c364fa90e07437c8405d635aa4ef54b25ab51d9c712be4112a5 -md5: 49ee13eb9b8f44d63879c69b8a40a74b -depends: -- python >=3.10 -license: MIT -license_family: MIT -size: 58510 -timestamp: 1773660086450 -- conda: https://conda.anaconda.org/conda-forge/noarch/click-8.3.1-pyh8f84b5b_1.conda -sha256: 38cfe1ee75b21a8361c8824f5544c3866f303af1762693a178266d7f198e8715 -md5: ea8a6c3256897cc31263de9f455e25d9 -depends: -- python >=3.10 -- __unix -- python -license: BSD-3-Clause -license_family: BSD -size: 97676 -timestamp: 1764518652276 -- conda: https://conda.anaconda.org/conda-forge/noarch/coloredlogs-15.0.1-pyhd8ed1ab_4.conda -sha256: 8021c76eeadbdd5784b881b165242db9449783e12ce26d6234060026fd6a8680 -md5: b866ff7007b934d564961066c8195983 -depends: -- humanfriendly >=9.1 -- python >=3.9 -license: MIT -license_family: MIT -size: 43758 -timestamp: 1733928076798 -- conda: https://conda.anaconda.org/conda-forge/noarch/colormath-3.0.0-pyhd8ed1ab_4.conda -sha256: 59c9e29800b483b390467f90e82b0da3a4fbf0612efe1c90813fca232780e160 -md5: 071cf7b0ce333c81718b054066c15102 -depends: -- networkx >=2.0 -- numpy -- python >=3.9 -license: BSD-3-Clause -license_family: BSD -size: 39326 -timestamp: 1735759976140 -- conda: https://conda.anaconda.org/conda-forge/noarch/cpython-3.14.3-py314hd8ed1ab_101.conda -noarch: generic -sha256: 91b06300879df746214f7363d6c27c2489c80732e46a369eb2afc234bcafb44c -md5: 3bb89e4f795e5414addaa531d6b1500a -depends: -- python >=3.14,<3.15.0a0 -- python_abi * *_cp314 -license: Python-2.0 -size: 50078 -timestamp: 1770674447292 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/expat-2.7.4-hfae3067_0.conda -sha256: 5f087bef054c681edcaae84a8c2230585b938691e371ff92957a30707b7fcdf7 -md5: b304307db639831ad7caabd2eac6fca6 -depends: -- libexpat 2.7.4 hfae3067_0 -- libgcc >=14 -license: MIT -license_family: MIT -size: 137701 -timestamp: 1771259543650 -- conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2 -sha256: 58d7f40d2940dd0a8aa28651239adbf5613254df0f75789919c4e6762054403b -md5: 0c96522c6bdaed4b1566d11387caaf45 -license: BSD-3-Clause -license_family: BSD -size: 397370 -timestamp: 1566932522327 -- conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2 -sha256: c52a29fdac682c20d252facc50f01e7c2e7ceac52aa9817aaf0bb83f7559ec5c -md5: 34893075a5c9e55cdafac56607368fc6 -license: OFL-1.1 -license_family: Other -size: 96530 -timestamp: 1620479909603 -- conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2 -sha256: 00925c8c055a2275614b4d983e1df637245e19058d79fc7dd1a93b8d9fb4b139 -md5: 4d59c254e01d9cde7957100457e2d5fb -license: OFL-1.1 -license_family: Other -size: 700814 -timestamp: 1620479612257 -- conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_3.conda -sha256: 2821ec1dc454bd8b9a31d0ed22a7ce22422c0aef163c59f49dfdf915d0f0ca14 -md5: 49023d73832ef61042f6a237cb2687e7 -license: LicenseRef-Ubuntu-Font-Licence-Version-1.0 -license_family: Other -size: 1620504 -timestamp: 1727511233259 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fontconfig-2.17.1-hba86a56_0.conda -sha256: 835aff8615dd8d8fff377679710ce81b8a2c47b6404e21a92fb349fda193a15c -md5: 0fed1ff55f4938a65907f3ecf62609db -depends: -- libexpat >=2.7.4,<3.0a0 -- libfreetype >=2.14.1 -- libfreetype6 >=2.14.1 -- libgcc >=14 -- libuuid >=2.41.3,<3.0a0 -- libzlib >=1.3.1,<2.0a0 -license: MIT -license_family: MIT -size: 279044 -timestamp: 1771382728182 -- conda: https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-hc364b38_1.conda -sha256: 54eea8469786bc2291cc40bca5f46438d3e062a399e8f53f013b6a9f50e98333 -md5: a7970cd949a077b7cb9696379d338681 -depends: -- font-ttf-ubuntu -- font-ttf-inconsolata -- font-ttf-dejavu-sans-mono -- font-ttf-source-code-pro -license: BSD-3-Clause -license_family: BSD -size: 4059 -timestamp: 1762351264405 -- conda: https://conda.anaconda.org/conda-forge/noarch/h2-4.3.0-pyhcf101f3_0.conda -sha256: 84c64443368f84b600bfecc529a1194a3b14c3656ee2e832d15a20e0329b6da3 -md5: 164fc43f0b53b6e3a7bc7dce5e4f1dc9 -depends: -- python >=3.10 -- hyperframe >=6.1,<7 -- hpack >=4.1,<5 -- python -license: MIT -license_family: MIT -size: 95967 -timestamp: 1756364871835 -- conda: https://conda.anaconda.org/conda-forge/noarch/hpack-4.1.0-pyhd8ed1ab_0.conda -sha256: 6ad78a180576c706aabeb5b4c8ceb97c0cb25f1e112d76495bff23e3779948ba -md5: 0a802cb9888dd14eeefc611f05c40b6e -depends: -- python >=3.9 -license: MIT -license_family: MIT -size: 30731 -timestamp: 1737618390337 -- conda: https://conda.anaconda.org/conda-forge/noarch/humanfriendly-10.0-pyh707e725_8.conda -sha256: fa2071da7fab758c669e78227e6094f6b3608228740808a6de5d6bce83d9e52d -md5: 7fe569c10905402ed47024fc481bb371 -depends: -- __unix -- python >=3.9 -license: MIT -license_family: MIT -size: 73563 -timestamp: 1733928021866 -- conda: https://conda.anaconda.org/conda-forge/noarch/humanize-4.15.0-pyhd8ed1ab_0.conda -sha256: 6c4343b376d0b12a4c75ab992640970d36c933cad1fd924f6a1181fa91710e80 -md5: daddf757c3ecd6067b9af1df1f25d89e -depends: -- python >=3.10 -license: MIT -license_family: MIT -size: 67994 -timestamp: 1766267728652 -- conda: https://conda.anaconda.org/conda-forge/noarch/hyperframe-6.1.0-pyhd8ed1ab_0.conda -sha256: 77af6f5fe8b62ca07d09ac60127a30d9069fdc3c68d6b256754d0ffb1f7779f8 -md5: 8e6923fc12f1fe8f8c4e5c9f343256ac -depends: -- python >=3.9 -license: MIT -license_family: MIT -size: 17397 -timestamp: 1737618427549 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/icu-78.3-hcab7f73_0.conda -sha256: 49ba6aed2c6b482bb0ba41078057555d29764299bc947b990708617712ef6406 -md5: 546da38c2fa9efacf203e2ad3f987c59 -depends: -- libgcc >=14 -- libstdcxx >=14 -license: MIT -license_family: MIT -size: 12837286 -timestamp: 1773822650615 -- conda: https://conda.anaconda.org/conda-forge/noarch/idna-3.11-pyhd8ed1ab_0.conda -sha256: ae89d0299ada2a3162c2614a9d26557a92aa6a77120ce142f8e0109bbf0342b0 -md5: 53abe63df7e10a6ba605dc5f9f961d36 -depends: -- python >=3.10 -license: BSD-3-Clause -license_family: BSD -size: 50721 -timestamp: 1760286526795 -- conda: https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-8.8.0-pyhcf101f3_0.conda -sha256: 82ab2a0d91ca1e7e63ab6a4939356667ef683905dea631bc2121aa534d347b16 -md5: 080594bf4493e6bae2607e65390c520a -depends: -- python >=3.10 -- zipp >=3.20 -- python -license: Apache-2.0 -license_family: APACHE -size: 34387 -timestamp: 1773931568510 -- conda: https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.6-pyhcf101f3_1.conda -sha256: fc9ca7348a4f25fed2079f2153ecdcf5f9cf2a0bc36c4172420ca09e1849df7b -md5: 04558c96691bed63104678757beb4f8d -depends: -- markupsafe >=2.0 -- python >=3.10 -- python -license: BSD-3-Clause -license_family: BSD -size: 120685 -timestamp: 1764517220861 -- conda: https://conda.anaconda.org/conda-forge/noarch/jsonschema-4.26.0-pyhcf101f3_0.conda -sha256: db973a37d75db8e19b5f44bbbdaead0c68dde745407f281e2a7fe4db74ec51d7 -md5: ada41c863af263cc4c5fcbaff7c3e4dc -depends: -- attrs >=22.2.0 -- jsonschema-specifications >=2023.3.6 -- python >=3.10 -- referencing >=0.28.4 -- rpds-py >=0.25.0 -- python -license: MIT -license_family: MIT -size: 82356 -timestamp: 1767839954256 -- conda: https://conda.anaconda.org/conda-forge/noarch/jsonschema-specifications-2025.9.1-pyhcf101f3_0.conda -sha256: 0a4f3b132f0faca10c89fdf3b60e15abb62ded6fa80aebfc007d05965192aa04 -md5: 439cd0f567d697b20a8f45cb70a1005a -depends: -- python >=3.10 -- referencing >=0.31.0 -- python -license: MIT -license_family: MIT -size: 19236 -timestamp: 1757335715225 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/kaleido-core-0.2.1-he5a581e_0.tar.bz2 -sha256: d3c7f4797566e6f983d16c2a87063a18e4b2d819a66230190a21584d70042755 -md5: 4f0d284f5d11e04277b552eb1c172c7f -depends: -- __glibc >=2.17,<3.0.a0 -- expat >=2.2.10,<3.0.0a0 -- fontconfig -- fonts-conda-forge -- libgcc-ng >=9.3.0 -- mathjax 2.7.* -- nspr >=4.29,<5.0a0 -- nss >=3.62,<4.0a0 -- sqlite >=3.34.0,<4.0a0 -license: MIT -license_family: MIT -size: 65750397 -timestamp: 1615199465742 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/lcms2-2.18-h9d5b58d_0.conda -sha256: 379ef5e91a587137391a6149755d0e929f1a007d2dcb211318ac670a46c8596f -md5: bb960f01525b5e001608afef9d47b79c -depends: -- libgcc >=14 -- libjpeg-turbo >=3.1.2,<4.0a0 -- libtiff >=4.7.1,<4.8.0a0 -license: MIT -license_family: MIT -size: 293039 -timestamp: 1768184778398 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/ld_impl_linux-aarch64-2.45.1-default_h1979696_102.conda -sha256: 7abd913d81a9bf00abb699e8987966baa2065f5132e37e815f92d90fc6bba530 -md5: a21644fc4a83da26452a718dc9468d5f -depends: -- zstd >=1.5.7,<1.6.0a0 -constrains: -- binutils_impl_linux-aarch64 2.45.1 -license: GPL-3.0-only -license_family: GPL -size: 875596 -timestamp: 1774197520746 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/lerc-4.1.0-h52b7260_0.conda -sha256: 8957fd460c1c132c8031f65fd5f56ec3807fd71b7cab2c5e2b0937b13404ab36 -md5: d13423b06447113a90b5b1366d4da171 -depends: -- libgcc >=14 -- libstdcxx >=14 -license: Apache-2.0 -license_family: Apache -size: 240444 -timestamp: 1773114901155 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libblas-3.11.0-5_haddc8a3_openblas.conda -build_number: 5 -sha256: 700f3c03d0fba8e687a345404a45fbabe781c1cf92242382f62cef2948745ec4 -md5: 5afcea37a46f76ec1322943b3c4dfdc0 -depends: -- libopenblas >=0.3.30,<0.3.31.0a0 -- libopenblas >=0.3.30,<1.0a0 -constrains: -- mkl <2026 -- libcblas 3.11.0 5*_openblas -- liblapack 3.11.0 5*_openblas -- liblapacke 3.11.0 5*_openblas -- blas 2.305 openblas -license: BSD-3-Clause -license_family: BSD -size: 18369 -timestamp: 1765818610617 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcblas-3.11.0-5_hd72aa62_openblas.conda -build_number: 5 -sha256: 3fad5c9de161dccb4e42c8b1ae8eccb33f4ed56bccbcced9cbb0956ae7869e61 -md5: 0b2f1143ae2d0aa4c991959d0daaf256 -depends: -- libblas 3.11.0 5_haddc8a3_openblas -constrains: -- liblapack 3.11.0 5*_openblas -- liblapacke 3.11.0 5*_openblas -- blas 2.305 openblas -license: BSD-3-Clause -license_family: BSD -size: 18371 -timestamp: 1765818618899 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libdeflate-1.25-h1af38f5_0.conda -sha256: 48814b73bd462da6eed2e697e30c060ae16af21e9fbed30d64feaf0aad9da392 -md5: a9138815598fe6b91a1d6782ca657b0c -depends: -- libgcc >=14 -license: MIT -license_family: MIT -size: 71117 -timestamp: 1761979776756 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libexpat-2.7.4-hfae3067_0.conda -sha256: 995ce3ad96d0f4b5ed6296b051a0d7b6377718f325bc0e792fbb96b0e369dad7 -md5: 57f3b3da02a50a1be2a6fe847515417d -depends: -- libgcc >=14 -constrains: -- expat 2.7.4.* -license: MIT -license_family: MIT -size: 76564 -timestamp: 1771259530958 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libffi-3.5.2-h376a255_0.conda -sha256: 3df4c539449aabc3443bbe8c492c01d401eea894603087fca2917aa4e1c2dea9 -md5: 2f364feefb6a7c00423e80dcb12db62a -depends: -- libgcc >=14 -license: MIT -license_family: MIT -size: 55952 -timestamp: 1769456078358 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libfreetype-2.14.3-h8af1aa0_0.conda -sha256: 752e4f66283d7deb4c6fd47d88df644d8daa2aaa825a54f3bf350a625190192a -md5: a229e22d4d8814a07702b0919d8e6701 -depends: -- libfreetype6 >=2.14.3 -license: GPL-2.0-only OR FTL -size: 8125 -timestamp: 1774301094057 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libfreetype6-2.14.3-hdae7a39_0.conda -sha256: 8e6b27fe4eec4c2fa7b7769a21973734c8dba1de80086fb0213e58375ac09f4c -md5: b99ed99e42dafb27889483b3098cace7 -depends: -- libgcc >=14 -- libpng >=1.6.55,<1.7.0a0 -- libzlib >=1.3.2,<2.0a0 -constrains: -- freetype >=2.14.3 -license: GPL-2.0-only OR FTL -size: 422941 -timestamp: 1774301093473 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgcc-15.2.0-h8acb6b2_18.conda -sha256: 43df385bedc1cab11993c4369e1f3b04b4ca5d0ea16cba6a0e7f18dbc129fcc9 -md5: 552567ea2b61e3a3035759b2fdb3f9a6 -depends: -- _openmp_mutex >=4.5 -constrains: -- libgcc-ng ==15.2.0=*_18 -- libgomp 15.2.0 h8acb6b2_18 -license: GPL-3.0-only WITH GCC-exception-3.1 -license_family: GPL -size: 622900 -timestamp: 1771378128706 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgcc-ng-15.2.0-he9431aa_18.conda -sha256: 83bb0415f59634dccfa8335d4163d1f6db00a27b36666736f9842b650b92cf2f -md5: 4feebd0fbf61075a1a9c2e9b3936c257 -depends: -- libgcc 15.2.0 h8acb6b2_18 -license: GPL-3.0-only WITH GCC-exception-3.1 -license_family: GPL -size: 27568 -timestamp: 1771378136019 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgfortran-15.2.0-he9431aa_18.conda -sha256: 7dcd7dff2505d56fd5272a6e712ec912f50a46bf07dc6873a7e853694304e6e4 -md5: 41f261f5e4e2e8cbd236c2f1f15dae1b -depends: -- libgfortran5 15.2.0 h1b7bec0_18 -constrains: -- libgfortran-ng ==15.2.0=*_18 -license: GPL-3.0-only WITH GCC-exception-3.1 -license_family: GPL -size: 27587 -timestamp: 1771378169244 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgfortran5-15.2.0-h1b7bec0_18.conda -sha256: 85347670dfb4a8d4c13cd7cae54138dcf2b1606b6bede42eef5507bf5f9660c6 -md5: 574d88ce3348331e962cfa5ed451b247 -depends: -- libgcc >=15.2.0 -constrains: -- libgfortran 15.2.0 -license: GPL-3.0-only WITH GCC-exception-3.1 -license_family: GPL -size: 1486341 -timestamp: 1771378148102 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgomp-15.2.0-h8acb6b2_18.conda -sha256: fc716f11a6a8525e27a5d332ef6a689210b0d2a4dd1133edc0f530659aa9faa6 -md5: 4faa39bf919939602e594253bd673958 -license: GPL-3.0-only WITH GCC-exception-3.1 -license_family: GPL -size: 588060 -timestamp: 1771378040807 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libjpeg-turbo-3.1.2-he30d5cf_0.conda -sha256: 84064c7c53a64291a585d7215fe95ec42df74203a5bf7615d33d49a3b0f08bb6 -md5: 5109d7f837a3dfdf5c60f60e311b041f -depends: -- libgcc >=14 -constrains: -- jpeg <0.0.0a -license: IJG AND BSD-3-Clause AND Zlib -size: 691818 -timestamp: 1762094728337 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/liblapack-3.11.0-5_h88aeb00_openblas.conda -build_number: 5 -sha256: 692222d186d3ffbc99eaf04b5b20181fd26aee1edec1106435a0a755c57cce86 -md5: 88d1e4133d1182522b403e9ba7435f04 -depends: -- libblas 3.11.0 5_haddc8a3_openblas -constrains: -- liblapacke 3.11.0 5*_openblas -- blas 2.305 openblas -- libcblas 3.11.0 5*_openblas -license: BSD-3-Clause -license_family: BSD -size: 18392 -timestamp: 1765818627104 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/liblzma-5.8.2-he30d5cf_0.conda -sha256: 843c46e20519651a3e357a8928352b16c5b94f4cd3d5481acc48be2e93e8f6a3 -md5: 96944e3c92386a12755b94619bae0b35 -depends: -- libgcc >=14 -constrains: -- xz 5.8.2.* -license: 0BSD -size: 125916 -timestamp: 1768754941722 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libmpdec-4.0.0-he30d5cf_1.conda -sha256: 57c0dd12d506e84541c4e877898bd2a59cca141df493d34036f18b2751e0a453 -md5: 7b9813e885482e3ccb1fa212b86d7fd0 -depends: -- libgcc >=14 -license: BSD-2-Clause -license_family: BSD -size: 114056 -timestamp: 1769482343003 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libopenblas-0.3.30-pthreads_h9d3fd7e_4.conda -sha256: 794a7270ea049ec931537874cd8d2de0ef4b3cef71c055cfd8b4be6d2f4228b0 -md5: 11d7d57b7bdd01da745bbf2b67020b2e -depends: -- libgcc >=14 -- libgfortran -- libgfortran5 >=14.3.0 -constrains: -- openblas >=0.3.30,<0.3.31.0a0 -license: BSD-3-Clause -license_family: BSD -size: 4959359 -timestamp: 1763114173544 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libpng-1.6.55-h1abf092_0.conda -sha256: c7378c6b79de4d571d00ad1caf0a4c19d43c9c94077a761abb6ead44d891f907 -md5: be4088903b94ea297975689b3c3aeb27 -depends: -- libgcc >=14 -- libzlib >=1.3.1,<2.0a0 -license: zlib-acknowledgement -size: 340156 -timestamp: 1770691477245 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libsqlite-3.52.0-h10b116e_0.conda -sha256: 1ddaf91b44fae83856276f4cb7ce544ffe41d4b55c1e346b504c6b45f19098d6 -md5: 77891484f18eca74b8ad83694da9815e -depends: -- icu >=78.2,<79.0a0 -- libgcc >=14 -- libzlib >=1.3.1,<2.0a0 -license: blessing -size: 952296 -timestamp: 1772818881550 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libstdcxx-15.2.0-hef695bb_18.conda -sha256: 31fdb9ffafad106a213192d8319b9f810e05abca9c5436b60e507afb35a6bc40 -md5: f56573d05e3b735cb03efeb64a15f388 -depends: -- libgcc 15.2.0 h8acb6b2_18 -constrains: -- libstdcxx-ng ==15.2.0=*_18 -license: GPL-3.0-only WITH GCC-exception-3.1 -license_family: GPL -size: 5541411 -timestamp: 1771378162499 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libtiff-4.7.1-hdb009f0_1.conda -sha256: 7ff79470db39e803e21b8185bc8f19c460666d5557b1378d1b1e857d929c6b39 -md5: 8c6fd84f9c87ac00636007c6131e457d -depends: -- lerc >=4.0.0,<5.0a0 -- libdeflate >=1.25,<1.26.0a0 -- libgcc >=14 -- libjpeg-turbo >=3.1.0,<4.0a0 -- liblzma >=5.8.1,<6.0a0 -- libstdcxx >=14 -- libwebp-base >=1.6.0,<2.0a0 -- libzlib >=1.3.1,<2.0a0 -- zstd >=1.5.7,<1.6.0a0 -license: HPND -size: 488407 -timestamp: 1762022048105 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libuuid-2.41.3-h1022ec0_0.conda -sha256: c37a8e89b700646f3252608f8368e7eb8e2a44886b92776e57ad7601fc402a11 -md5: cf2861212053d05f27ec49c3784ff8bb -depends: -- libgcc >=14 -license: BSD-3-Clause -license_family: BSD -size: 43453 -timestamp: 1766271546875 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libwebp-base-1.6.0-ha2e29f5_0.conda -sha256: b03700a1f741554e8e5712f9b06dd67e76f5301292958cd3cb1ac8c6fdd9ed25 -md5: 24e92d0942c799db387f5c9d7b81f1af -depends: -- libgcc >=14 -constrains: -- libwebp 1.6.0 -license: BSD-3-Clause -license_family: BSD -size: 359496 -timestamp: 1752160685488 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libxcb-1.17.0-h262b8f6_0.conda -sha256: 461cab3d5650ac6db73a367de5c8eca50363966e862dcf60181d693236b1ae7b -md5: cd14ee5cca2464a425b1dbfc24d90db2 -depends: -- libgcc >=13 -- pthread-stubs -- xorg-libxau >=1.0.11,<2.0a0 -- xorg-libxdmcp -license: MIT -license_family: MIT -size: 397493 -timestamp: 1727280745441 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libzlib-1.3.2-hdc9db2a_2.conda -sha256: eb111e32e5a7313a5bf799c7fb2419051fa2fe7eff74769fac8d5a448b309f7f -md5: 502006882cf5461adced436e410046d1 -constrains: -- zlib 1.3.2 *_2 -license: Zlib -license_family: Other -size: 69833 -timestamp: 1774072605429 -- conda: https://conda.anaconda.org/conda-forge/noarch/markdown-3.10.2-pyhcf101f3_0.conda -sha256: 20e0892592a3e7c683e3d66df704a9425d731486a97c34fc56af4da1106b2b6b -md5: ba0a9221ce1063f31692c07370d062f3 -depends: -- importlib-metadata >=4.4 -- python >=3.10 -- python -license: BSD-3-Clause -license_family: BSD -size: 85893 -timestamp: 1770694658918 -- conda: https://conda.anaconda.org/conda-forge/noarch/markdown-it-py-4.0.0-pyhd8ed1ab_0.conda -sha256: 7b1da4b5c40385791dbc3cc85ceea9fad5da680a27d5d3cb8bfaa185e304a89e -md5: 5b5203189eb668f042ac2b0826244964 -depends: -- mdurl >=0.1,<1 -- python >=3.10 -license: MIT -license_family: MIT -size: 64736 -timestamp: 1754951288511 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/markupsafe-3.0.3-py314hb76de3f_1.conda -sha256: 383c188496d13a55658c06e61e7d4cdff2c9f9d5a0648769fca8250bece7e0ef -md5: e5de3c36dd548b35ff2a8aa49208dcb3 -depends: -- libgcc >=14 -- python >=3.14,<3.15.0a0 -- python_abi 3.14.* *_cp314 -constrains: -- jinja2 >=3.0.0 -license: BSD-3-Clause -license_family: BSD -size: 27913 -timestamp: 1772446407659 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/mathjax-2.7.7-h8af1aa0_3.tar.bz2 -sha256: 8fd4c79d6eda3d4cba73783114305a53a154ada4d1e334d4e02cb3521429599b -md5: 7b08314a6867a9d5648a1c3265e9eb8e -license: Apache-2.0 -license_family: Apache -size: 22257008 -timestamp: 1662784555011 -- conda: https://conda.anaconda.org/conda-forge/noarch/mdurl-0.1.2-pyhd8ed1ab_1.conda -sha256: 78c1bbe1723449c52b7a9df1af2ee5f005209f67e40b6e1d3c7619127c43b1c7 -md5: 592132998493b3ff25fd7479396e8351 -depends: -- python >=3.9 -license: MIT -license_family: MIT -size: 14465 -timestamp: 1733255681319 -- conda: https://conda.anaconda.org/bioconda/noarch/multiqc-1.33-pyhdfd78af_0.conda -sha256: f005760b13093362fc9c997d603dd487de32ab2e821a3cbce52a42bcb8136517 -md5: 698a8a27c2b9d8a542c70cb47099a75e -depends: -- click -- coloredlogs -- humanize -- importlib-metadata -- jinja2 >=3.0.0 -- jsonschema -- markdown -- natsort -- numpy -- packaging -- pillow >=10.2.0 -- plotly >=5.18 -- polars-lts-cpu -- pyaml-env -- pydantic >=2.7.1 -- python >=3.8,!=3.14.1 -- python-dotenv -- python-kaleido 0.2.1 -- pyyaml >=4 -- requests -- rich >=10 -- rich-click -- spectra >=0.0.10 -- tiktoken -- tqdm -- typeguard -license: GPL-3.0-or-later -license_family: GPL3 -size: 4198799 -timestamp: 1765300743879 -- conda: https://conda.anaconda.org/conda-forge/noarch/narwhals-2.18.1-pyhcf101f3_1.conda -sha256: 541fd4390a0687228b8578247f1536a821d9261389a65585af9d1a6f2a14e1e0 -md5: 30bec5e8f4c3969e2b1bd407c5e52afb -depends: -- python >=3.10 -- python -license: MIT -size: 280459 -timestamp: 1774380620329 -- conda: https://conda.anaconda.org/conda-forge/noarch/natsort-8.4.0-pyhcf101f3_2.conda -sha256: aeb1548eb72e4f198e72f19d242fb695b35add2ac7b2c00e0d83687052867680 -md5: e941e85e273121222580723010bd4fa2 -depends: -- python >=3.9 -- python -license: MIT -license_family: MIT -size: 39262 -timestamp: 1770905275632 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/ncurses-6.5-ha32ae93_3.conda -sha256: 91cfb655a68b0353b2833521dc919188db3d8a7f4c64bea2c6a7557b24747468 -md5: 182afabe009dc78d8b73100255ee6868 -depends: -- libgcc >=13 -license: X11 AND BSD-3-Clause -size: 926034 -timestamp: 1738196018799 -- conda: https://conda.anaconda.org/conda-forge/noarch/networkx-3.6.1-pyhcf101f3_0.conda -sha256: f6a82172afc50e54741f6f84527ef10424326611503c64e359e25a19a8e4c1c6 -md5: a2c1eeadae7a309daed9d62c96012a2b -depends: -- python >=3.11 -- python -constrains: -- numpy >=1.25 -- scipy >=1.11.2 -- matplotlib-base >=3.8 -- pandas >=2.0 -license: BSD-3-Clause -license_family: BSD -size: 1587439 -timestamp: 1765215107045 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/nspr-4.38-h3ad9384_0.conda -sha256: 78a06e89285fef242e272998b292c1e621e3ee3dd4fba62ec014e503c7ec118f -md5: 6dd4f07147774bf720075a210f8026b9 -depends: -- libgcc >=14 -- libstdcxx >=14 -license: MPL-2.0 -license_family: MOZILLA -size: 235140 -timestamp: 1762350120355 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/nss-3.118-h544fa81_0.conda -sha256: 48942696889367ffd448f8dccfc080fb7e130b9938a4a3b6b20ef8e6af856463 -md5: 4540f9570d12db2150f42ba036154552 -depends: -- libgcc >=14 -- libsqlite >=3.51.0,<4.0a0 -- libstdcxx >=14 -- libzlib >=1.3.1,<2.0a0 -- nspr >=4.38,<5.0a0 -license: MPL-2.0 -license_family: MOZILLA -size: 2061869 -timestamp: 1763490303490 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/numpy-2.4.3-py314haac167e_0.conda -sha256: a6d42fd88afc57c3b0a57b21a12eff7492dfc419bb61ee3f74e9ba6261dabc88 -md5: 25d896c331481145720a21e5145fad65 -depends: -- python -- libgcc >=14 -- python 3.14.* *_cp314 -- libstdcxx >=14 -- libcblas >=3.9.0,<4.0a0 -- liblapack >=3.9.0,<4.0a0 -- python_abi 3.14.* *_cp314 -- libblas >=3.9.0,<4.0a0 -constrains: -- numpy-base <0a0 -license: BSD-3-Clause -license_family: BSD -size: 8008045 -timestamp: 1773839355275 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/openjpeg-2.5.4-h5da879a_0.conda -sha256: bd1bc8bdde5e6c5cbac42d462b939694e40b59be6d0698f668515908640c77b8 -md5: cea962410e327262346d48d01f05936c -depends: -- libgcc >=14 -- libpng >=1.6.50,<1.7.0a0 -- libstdcxx >=14 -- libtiff >=4.7.1,<4.8.0a0 -- libzlib >=1.3.1,<2.0a0 -license: BSD-2-Clause -license_family: BSD -size: 392636 -timestamp: 1758489353577 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/openssl-3.6.1-h546c87b_1.conda -sha256: 7f8048c0e75b2620254218d72b4ae7f14136f1981c5eb555ef61645a9344505f -md5: 25f5885f11e8b1f075bccf4a2da91c60 -depends: -- ca-certificates -- libgcc >=14 -license: Apache-2.0 -license_family: Apache -size: 3692030 -timestamp: 1769557678657 -- conda: https://conda.anaconda.org/conda-forge/noarch/packaging-26.0-pyhcf101f3_0.conda -sha256: c1fc0f953048f743385d31c468b4a678b3ad20caffdeaa94bed85ba63049fd58 -md5: b76541e68fea4d511b1ac46a28dcd2c6 -depends: -- python >=3.8 -- python -license: Apache-2.0 -license_family: APACHE -size: 72010 -timestamp: 1769093650580 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/pillow-12.1.1-py314hac3e5ec_0.conda -sha256: 1ca2d1616baad9bccb7ebc425ef2dcd6cebe742fbe91edf226fb606ad371ca0f -md5: d3c959c7efe560b2d7da459d69121fe9 -depends: -- python -- python 3.14.* *_cp314 -- libgcc >=14 -- zlib-ng >=2.3.3,<2.4.0a0 -- libwebp-base >=1.6.0,<2.0a0 -- tk >=8.6.13,<8.7.0a0 -- libfreetype >=2.14.1 -- libfreetype6 >=2.14.1 -- libtiff >=4.7.1,<4.8.0a0 -- lcms2 >=2.18,<3.0a0 -- python_abi 3.14.* *_cp314 -- openjpeg >=2.5.4,<3.0a0 -- libjpeg-turbo >=3.1.2,<4.0a0 -- libxcb >=1.17.0,<2.0a0 -license: HPND -size: 1051828 -timestamp: 1770794010335 -- conda: https://conda.anaconda.org/conda-forge/noarch/plotly-6.6.0-pyhd8ed1ab_0.conda -sha256: c418d325359fc7a0074cea7f081ef1bce26e114d2da8a0154c5d27ecc87a08e7 -md5: 3e9427ee186846052e81fadde8ebe96a -depends: -- narwhals >=1.15.1 -- packaging -- python >=3.10 -constrains: -- ipywidgets >=7.6 -license: MIT -license_family: MIT -size: 5251872 -timestamp: 1772628857717 -- conda: https://conda.anaconda.org/conda-forge/noarch/polars-1.39.3-pyh58ad624_1.conda -sha256: d332c2d5002fc440ae37ed9679ffc21b552f18d20232390005d1dd3bce0888d3 -md5: d5a4e013a30dd8dfde9ab39f45aaf9c1 -depends: -- polars-runtime-32 ==1.39.3 -- python >=3.10 -- python -constrains: -- numpy >=1.16.0 -- pyarrow >=7.0.0 -- fastexcel >=0.9 -- openpyxl >=3.0.0 -- xlsx2csv >=0.8.0 -- connectorx >=0.3.2 -- deltalake >=1.0.0 -- pyiceberg >=0.7.1 -- altair >=5.4.0 -- great_tables >=0.8.0 -- polars-runtime-32 ==1.39.3 -- polars-runtime-64 ==1.39.3 -- polars-runtime-compat ==1.39.3 -license: MIT -license_family: MIT -size: 533495 -timestamp: 1774207987966 -- conda: https://conda.anaconda.org/conda-forge/noarch/polars-lts-cpu-1.34.0.deprecated-hc364b38_0.conda -sha256: e466fb31f67ba9bde18deafeb34263ca5eb25807f39ead0e9d753a8e82c4c4f4 -md5: ef0340e75068ac8ff96462749b5c98e7 -depends: -- polars >=1.34.0 -- polars-runtime-compat >=1.34.0 -license: MIT -license_family: MIT -size: 3902 -timestamp: 1760206808444 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/polars-runtime-32-1.39.3-py310hff09b76_1.conda -noarch: python -sha256: c070be507c5a90df397a47ae0299660be437d5546d68f1bc0fa4402c9f07d59e -md5: 3c1a7c6b4ba8b9fb773ace9723f8a5db -depends: -- python -- libgcc >=14 -- libstdcxx >=14 -- _python_abi3_support 1.* -- cpython >=3.10 -constrains: -- __glibc >=2.17 -license: MIT -license_family: MIT -size: 34785466 -timestamp: 1774207998285 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/polars-runtime-compat-1.39.3-py310hf00a4a2_1.conda -noarch: python -sha256: 683315f1a49e47ce72bf9462419733b40b588b2b3106552d95fd4cd994e174de -md5: dd3464e2132dc3a783e76e5078870c76 -depends: -- python -- libgcc >=14 -- libstdcxx >=14 -- _python_abi3_support 1.* -- cpython >=3.10 -constrains: -- __glibc >=2.17 -license: MIT -license_family: MIT -size: 34652491 -timestamp: 1774207996879 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/procps-ng-4.0.6-h1779866_0.conda -sha256: e9cbcbc94e151ada3d6dc365380aaaf591f65012c16d9a2abaea4b9b90adc402 -md5: ab7288cc39545556d1bc5e71ab2df9a9 -depends: -- libgcc >=14 -- ncurses >=6.5,<7.0a0 -license: GPL-2.0-or-later AND LGPL-2.0-or-later -license_family: GPL -size: 636733 -timestamp: 1769712412683 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/pthread-stubs-0.4-h86ecc28_1002.conda -sha256: 977dfb0cb3935d748521dd80262fe7169ab82920afd38ed14b7fee2ea5ec01ba -md5: bb5a90c93e3bac3d5690acf76b4a6386 -depends: -- libgcc >=13 -license: MIT -license_family: MIT -size: 8342 -timestamp: 1726803319942 -- conda: https://conda.anaconda.org/conda-forge/noarch/pyaml-env-1.2.2-pyhd8ed1ab_0.conda -sha256: 58994e0d2ea8584cb399546e6f6896d771995e6121d1a7b6a2c9948388358932 -md5: e17be1016bcc3516827b836cd3e4d9dc -depends: -- python >=3.9 -- pyyaml >=5.0,<=7.0 -license: MIT -license_family: MIT -size: 14645 -timestamp: 1736766960536 -- conda: https://conda.anaconda.org/conda-forge/noarch/pydantic-2.12.5-pyhcf101f3_1.conda -sha256: 868569d9505b7fe246c880c11e2c44924d7613a8cdcc1f6ef85d5375e892f13d -md5: c3946ed24acdb28db1b5d63321dbca7d -depends: -- typing-inspection >=0.4.2 -- typing_extensions >=4.14.1 -- python >=3.10 -- typing-extensions >=4.6.1 -- annotated-types >=0.6.0 -- pydantic-core ==2.41.5 -- python -license: MIT -license_family: MIT -size: 340482 -timestamp: 1764434463101 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/pydantic-core-2.41.5-py314h451b6cc_1.conda -sha256: f8acb2d03ebe80fed0032b9a989fc9acfb6735e3cd3f8c704b72728cb31868f6 -md5: 28f5027a1e04d67aa13fac1c5ba79693 -depends: -- python -- typing-extensions >=4.6.0,!=4.7.0 -- libgcc >=14 -- python 3.14.* *_cp314 -- python_abi 3.14.* *_cp314 -constrains: -- __glibc >=2.17 -license: MIT -license_family: MIT -size: 1828339 -timestamp: 1762989038561 -- conda: https://conda.anaconda.org/conda-forge/noarch/pygments-2.19.2-pyhd8ed1ab_0.conda -sha256: 5577623b9f6685ece2697c6eb7511b4c9ac5fb607c9babc2646c811b428fd46a -md5: 6b6ece66ebcae2d5f326c77ef2c5a066 -depends: -- python >=3.9 -license: BSD-2-Clause -license_family: BSD -size: 889287 -timestamp: 1750615908735 -- conda: https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha55dd90_7.conda -sha256: ba3b032fa52709ce0d9fd388f63d330a026754587a2f461117cac9ab73d8d0d8 -md5: 461219d1a5bd61342293efa2c0c90eac -depends: -- __unix -- python >=3.9 -license: BSD-3-Clause -license_family: BSD -size: 21085 -timestamp: 1733217331982 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/python-3.14.3-hb06a95a_101_cp314.conda -build_number: 101 -sha256: 87e9dff5646aba87cecfbc08789634c855871a7325169299d749040b0923a356 -md5: 205011b36899ff0edf41b3db0eda5a44 -depends: -- bzip2 >=1.0.8,<2.0a0 -- ld_impl_linux-aarch64 >=2.36.1 -- libexpat >=2.7.3,<3.0a0 -- libffi >=3.5.2,<3.6.0a0 -- libgcc >=14 -- liblzma >=5.8.2,<6.0a0 -- libmpdec >=4.0.0,<5.0a0 -- libsqlite >=3.51.2,<4.0a0 -- libuuid >=2.41.3,<3.0a0 -- libzlib >=1.3.1,<2.0a0 -- ncurses >=6.5,<7.0a0 -- openssl >=3.5.5,<4.0a0 -- python_abi 3.14.* *_cp314 -- readline >=8.3,<9.0a0 -- tk >=8.6.13,<8.7.0a0 -- tzdata -- zstd >=1.5.7,<1.6.0a0 -license: Python-2.0 -size: 37305578 -timestamp: 1770674395875 -python_site_packages_path: lib/python3.14/site-packages -- conda: https://conda.anaconda.org/conda-forge/noarch/python-dotenv-1.2.2-pyhcf101f3_0.conda -sha256: 74e417a768f59f02a242c25e7db0aa796627b5bc8c818863b57786072aeb85e5 -md5: 130584ad9f3a513cdd71b1fdc1244e9c -depends: -- python >=3.10 -license: BSD-3-Clause -license_family: BSD -size: 27848 -timestamp: 1772388605021 -- conda: https://conda.anaconda.org/conda-forge/noarch/python-gil-3.14.3-h4df99d1_101.conda -sha256: 233aebd94c704ac112afefbb29cf4170b7bc606e22958906f2672081bc50638a -md5: 235765e4ea0d0301c75965985163b5a1 -depends: -- cpython 3.14.3.* -- python_abi * *_cp314 -license: Python-2.0 -size: 50062 -timestamp: 1770674497152 -- conda: https://conda.anaconda.org/conda-forge/noarch/python-kaleido-0.2.1-pyhd8ed1ab_0.tar.bz2 -sha256: e17bf63a30aec33432f1ead86e15e9febde9fc40a7f869c0e766be8d2db44170 -md5: 310259a5b03ff02289d7705f39e2b1d2 -depends: -- kaleido-core 0.2.1.* -- python >=3.5 -license: MIT -license_family: MIT -size: 18320 -timestamp: 1615204747600 -- conda: https://conda.anaconda.org/conda-forge/noarch/python_abi-3.14-8_cp314.conda -build_number: 8 -sha256: ad6d2e9ac39751cc0529dd1566a26751a0bf2542adb0c232533d32e176e21db5 -md5: 0539938c55b6b1a59b560e843ad864a4 -constrains: -- python 3.14.* *_cp314 -license: BSD-3-Clause -license_family: BSD -size: 6989 -timestamp: 1752805904792 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/pyyaml-6.0.3-py314h807365f_1.conda -sha256: 496b5e65dfdd0aaaaa5de0dcaaf3bceea00fcb4398acf152f89e567c82ec1046 -md5: 9ae2c92975118058bd720e9ba2bb7c58 -depends: -- libgcc >=14 -- python >=3.14,<3.15.0a0 -- python >=3.14,<3.15.0a0 *_cp314 -- python_abi 3.14.* *_cp314 -- yaml >=0.2.5,<0.3.0a0 -license: MIT -license_family: MIT -size: 195678 -timestamp: 1770223441816 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/readline-8.3-hb682ff5_0.conda -sha256: fe695f9d215e9a2e3dd0ca7f56435ab4df24f5504b83865e3d295df36e88d216 -md5: 3d49cad61f829f4f0e0611547a9cda12 -depends: -- libgcc >=14 -- ncurses >=6.5,<7.0a0 -license: GPL-3.0-only -license_family: GPL -size: 357597 -timestamp: 1765815673644 -- conda: https://conda.anaconda.org/conda-forge/noarch/referencing-0.37.0-pyhcf101f3_0.conda -sha256: 0577eedfb347ff94d0f2fa6c052c502989b028216996b45c7f21236f25864414 -md5: 870293df500ca7e18bedefa5838a22ab -depends: -- attrs >=22.2.0 -- python >=3.10 -- rpds-py >=0.7.0 -- typing_extensions >=4.4.0 -- python -license: MIT -license_family: MIT -size: 51788 -timestamp: 1760379115194 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/regex-2026.2.28-py314h51f160d_0.conda -sha256: 2080ecea825e1ef91a2422cc0bc63e85db9e38908ed17657fb8f41de7a6eee71 -md5: 818aa2c9f6b3c808da5e7be22a9a424c -depends: -- libgcc >=14 -- python >=3.14,<3.15.0a0 -- python >=3.14,<3.15.0a0 *_cp314 -- python_abi 3.14.* *_cp314 -license: Apache-2.0 AND CNRI-Python -license_family: PSF -size: 408097 -timestamp: 1772255205521 -- conda: https://conda.anaconda.org/conda-forge/noarch/requests-2.32.5-pyhcf101f3_1.conda -sha256: 7813c38b79ae549504b2c57b3f33394cea4f2ad083f0994d2045c2e24cb538c5 -md5: c65df89a0b2e321045a9e01d1337b182 -depends: -- python >=3.10 -- certifi >=2017.4.17 -- charset-normalizer >=2,<4 -- idna >=2.5,<4 -- urllib3 >=1.21.1,<3 -- python -constrains: -- chardet >=3.0.2,<6 -license: Apache-2.0 -license_family: APACHE -size: 63602 -timestamp: 1766926974520 -- conda: https://conda.anaconda.org/conda-forge/noarch/rich-14.3.3-pyhcf101f3_0.conda -sha256: b06ce84d6a10c266811a7d3adbfa1c11f13393b91cc6f8a5b468277d90be9590 -md5: 7a6289c50631d620652f5045a63eb573 -depends: -- markdown-it-py >=2.2.0 -- pygments >=2.13.0,<3.0.0 -- python >=3.10 -- typing_extensions >=4.0.0,<5.0.0 -- python -license: MIT -license_family: MIT -size: 208472 -timestamp: 1771572730357 -- conda: https://conda.anaconda.org/conda-forge/noarch/rich-click-1.9.7-pyh8f84b5b_0.conda -sha256: aa3fcb167321bae51998de2e94d199109c9024f25a5a063cb1c28d8f1af33436 -md5: 0c20a8ebcddb24a45da89d5e917e6cb9 -depends: -- python >=3.10 -- rich >=12 -- click >=8 -- typing-extensions >=4 -- __unix -- python -license: MIT -license_family: MIT -size: 64356 -timestamp: 1769850479089 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/rpds-py-0.30.0-py314h02b7a91_0.conda -sha256: a587240f16eac7c6a80f9585cef679cd1cb9a287b8dfcdd36dcef1f7e7db15dc -md5: e7f6ed9e60043bb5cbcc527764897f0d -depends: -- python -- libgcc >=14 -- python_abi 3.14.* *_cp314 -constrains: -- __glibc >=2.17 -license: MIT -license_family: MIT -size: 376332 -timestamp: 1764543345455 -- conda: https://conda.anaconda.org/conda-forge/noarch/spectra-0.0.11-pyhd8ed1ab_2.conda -sha256: 7c65782d2511738e62c70462e89d65da4fa54d5a7e47c46667bcd27a59f81876 -md5: 472239e4eb7b5a84bb96b3ed7e3a596a -depends: -- colormath >=3.0.0 -- python >=3.9 -license: MIT -license_family: MIT -size: 22284 -timestamp: 1735770589188 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/sqlite-3.52.0-hf1c7be2_0.conda -sha256: 4f8523f5341f0d9e1547085206c6c1f71f9fc7c277443ca363a8cf98add8fc01 -md5: d9634079df93a65ee045b3c75f35cae1 -depends: -- icu >=78.2,<79.0a0 -- libgcc >=14 -- libsqlite 3.52.0 h10b116e_0 -- libzlib >=1.3.1,<2.0a0 -- ncurses >=6.5,<7.0a0 -- readline >=8.3,<9.0a0 -license: blessing -size: 209416 -timestamp: 1772818891689 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/tiktoken-0.12.0-py314h6a36e60_3.conda -sha256: c1da41c79262b27efa168407cfecc47b20270e5fc071a8307f95a2c85fb94170 -md5: 55bf7b559202236157b14323b40f19e6 -depends: -- libgcc >=14 -- libstdcxx >=14 -- python >=3.14,<3.15.0a0 -- python_abi 3.14.* *_cp314 -- regex >=2022.1.18 -- requests >=2.26.0 -constrains: -- __glibc >=2.17 -license: MIT -license_family: MIT -size: 914402 -timestamp: 1764030357702 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/tk-8.6.13-noxft_h0dc03b3_103.conda -sha256: e25c314b52764219f842b41aea2c98a059f06437392268f09b03561e4f6e5309 -md5: 7fc6affb9b01e567d2ef1d05b84aa6ed -depends: -- libgcc >=14 -- libzlib >=1.3.1,<2.0a0 -constrains: -- xorg-libx11 >=1.8.12,<2.0a0 -license: TCL -license_family: BSD -size: 3368666 -timestamp: 1769464148928 -- conda: https://conda.anaconda.org/conda-forge/noarch/tqdm-4.67.3-pyh8f84b5b_0.conda -sha256: 9ef8e47cf00e4d6dcc114eb32a1504cc18206300572ef14d76634ba29dfe1eb6 -md5: e5ce43272193b38c2e9037446c1d9206 -depends: -- python >=3.10 -- __unix -- python -license: MPL-2.0 and MIT -size: 94132 -timestamp: 1770153424136 -- conda: https://conda.anaconda.org/conda-forge/noarch/typeguard-4.5.1-pyhd8ed1ab_0.conda -sha256: 39d8ae33c43cdb8f771373e149b0b4fae5a08960ac58dcca95b2f1642bb17448 -md5: 260af1b0a94f719de76b4e14094e9a3b -depends: -- importlib-metadata >=3.6 -- python >=3.10 -- typing-extensions >=4.10.0 -- typing_extensions >=4.14.0 -constrains: -- pytest >=7 -license: MIT -license_family: MIT -size: 36838 -timestamp: 1771532971545 -- conda: https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.15.0-h396c80c_0.conda -sha256: 7c2df5721c742c2a47b2c8f960e718c930031663ac1174da67c1ed5999f7938c -md5: edd329d7d3a4ab45dcf905899a7a6115 -depends: -- typing_extensions ==4.15.0 pyhcf101f3_0 -license: PSF-2.0 -license_family: PSF -size: 91383 -timestamp: 1756220668932 -- conda: https://conda.anaconda.org/conda-forge/noarch/typing-inspection-0.4.2-pyhd8ed1ab_1.conda -sha256: 70db27de58a97aeb7ba7448366c9853f91b21137492e0b4430251a1870aa8ff4 -md5: a0a4a3035667fc34f29bfbd5c190baa6 -depends: -- python >=3.10 -- typing_extensions >=4.12.0 -license: MIT -license_family: MIT -size: 18923 -timestamp: 1764158430324 -- conda: https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.15.0-pyhcf101f3_0.conda -sha256: 032271135bca55aeb156cee361c81350c6f3fb203f57d024d7e5a1fc9ef18731 -md5: 0caa1af407ecff61170c9437a808404d -depends: -- python >=3.10 -- python -license: PSF-2.0 -license_family: PSF -size: 51692 -timestamp: 1756220668932 -- conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2025c-hc9c84f9_1.conda -sha256: 1d30098909076af33a35017eed6f2953af1c769e273a0626a04722ac4acaba3c -md5: ad659d0a2b3e47e38d829aa8cad2d610 -license: LicenseRef-Public-Domain -size: 119135 -timestamp: 1767016325805 -- conda: https://conda.anaconda.org/conda-forge/noarch/urllib3-2.6.3-pyhd8ed1ab_0.conda -sha256: af641ca7ab0c64525a96fd9ad3081b0f5bcf5d1cbb091afb3f6ed5a9eee6111a -md5: 9272daa869e03efe68833e3dc7a02130 -depends: -- backports.zstd >=1.0.0 -- brotli-python >=1.2.0 -- h2 >=4,<5 -- pysocks >=1.5.6,<2.0,!=1.5.7 -- python >=3.10 -license: MIT -license_family: MIT -size: 103172 -timestamp: 1767817860341 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxau-1.0.12-he30d5cf_1.conda -sha256: e9f6e931feeb2f40e1fdbafe41d3b665f1ab6cb39c5880a1fcf9f79a3f3c84a5 -md5: 1c246e1105000c3660558459e2fd6d43 -depends: -- libgcc >=14 -license: MIT -license_family: MIT -size: 16317 -timestamp: 1762977521691 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxdmcp-1.1.5-he30d5cf_1.conda -sha256: 128d72f36bcc8d2b4cdbec07507542e437c7d67f677b7d77b71ed9eeac7d6df1 -md5: bff06dcde4a707339d66d45d96ceb2e2 -depends: -- libgcc >=14 -license: MIT -license_family: MIT -size: 21039 -timestamp: 1762979038025 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/yaml-0.2.5-h80f16a2_3.conda -sha256: 66265e943f32ce02396ad214e27cb35f5b0490b3bd4f064446390f9d67fa5d88 -md5: 032d8030e4a24fe1f72c74423a46fb88 -depends: -- libgcc >=14 -license: MIT -license_family: MIT -size: 88088 -timestamp: 1753484092643 -- conda: https://conda.anaconda.org/conda-forge/noarch/zipp-3.23.0-pyhcf101f3_1.conda -sha256: b4533f7d9efc976511a73ef7d4a2473406d7f4c750884be8e8620b0ce70f4dae -md5: 30cd29cb87d819caead4d55184c1d115 -depends: -- python >=3.10 -- python -license: MIT -license_family: MIT -size: 24194 -timestamp: 1764460141901 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/zlib-ng-2.3.3-ha7cb516_1.conda -sha256: 638a3a41a4fbfed52d3c60c8ef5a3693b3f12a5b1a3f58fa29f5698d0a0702e2 -md5: f731af71c723065d91b4c01bb822641b -depends: -- libgcc >=14 -- libstdcxx >=14 -license: Zlib -license_family: Other -size: 121046 -timestamp: 1770167944449 -- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/zstd-1.5.7-h85ac4a6_6.conda -sha256: 569990cf12e46f9df540275146da567d9c618c1e9c7a0bc9d9cfefadaed20b75 -md5: c3655f82dcea2aa179b291e7099c1fcc -depends: -- libzlib >=1.3.1,<2.0a0 -license: BSD-3-Clause -license_family: BSD -size: 614429 -timestamp: 1764777145593 diff --git a/modules/nf-core/multiqc/.conda-lock/linux_arm64-bd-d167b8012595a136_1.txt b/modules/nf-core/multiqc/.conda-lock/linux_arm64-bd-d167b8012595a136_1.txt deleted file mode 100644 index f787dbe1..00000000 --- a/modules/nf-core/multiqc/.conda-lock/linux_arm64-bd-d167b8012595a136_1.txt +++ /dev/null @@ -1,125 +0,0 @@ - -# This file may be used to create an environment using: -# $ conda create --name --file -# platform: linux-aarch64 -@EXPLICIT -https://conda.anaconda.org/conda-forge/linux-aarch64/libgomp-15.2.0-h8acb6b2_18.conda#4faa39bf919939602e594253bd673958 -https://conda.anaconda.org/conda-forge/linux-aarch64/_openmp_mutex-4.5-20_gnu.conda#468fd3bb9e1f671d36c2cbc677e56f1d -https://conda.anaconda.org/conda-forge/linux-aarch64/libgcc-15.2.0-h8acb6b2_18.conda#552567ea2b61e3a3035759b2fdb3f9a6 -https://conda.anaconda.org/conda-forge/linux-aarch64/bzip2-1.0.8-h4777abc_9.conda#840d8fc0d7b3209be93080bc20e07f2d -https://conda.anaconda.org/conda-forge/linux-aarch64/libzlib-1.3.2-hdc9db2a_2.conda#502006882cf5461adced436e410046d1 -https://conda.anaconda.org/conda-forge/linux-aarch64/zstd-1.5.7-h85ac4a6_6.conda#c3655f82dcea2aa179b291e7099c1fcc -https://conda.anaconda.org/conda-forge/linux-aarch64/ld_impl_linux-aarch64-2.45.1-default_h1979696_102.conda#a21644fc4a83da26452a718dc9468d5f -https://conda.anaconda.org/conda-forge/linux-aarch64/libexpat-2.7.5-hfae3067_0.conda#05d1e0b30acd816a192c03dc6e164f4d -https://conda.anaconda.org/conda-forge/linux-aarch64/libffi-3.5.2-h376a255_0.conda#2f364feefb6a7c00423e80dcb12db62a -https://conda.anaconda.org/conda-forge/linux-aarch64/liblzma-5.8.3-he30d5cf_0.conda#76298a9e6d71ee6e832a8d0d7373b261 -https://conda.anaconda.org/conda-forge/linux-aarch64/libmpdec-4.0.0-he30d5cf_1.conda#7b9813e885482e3ccb1fa212b86d7fd0 -https://conda.anaconda.org/conda-forge/linux-aarch64/libsqlite-3.53.0-h022381a_0.conda#86db4036fd08bf34e991bf48a8af405d -https://conda.anaconda.org/conda-forge/linux-aarch64/libuuid-2.42-h1022ec0_0.conda#a0b5de740d01c390bdbb46d7503c9fab -https://conda.anaconda.org/conda-forge/linux-aarch64/ncurses-6.5-ha32ae93_3.conda#182afabe009dc78d8b73100255ee6868 -https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2026.4.22-hbd8a1cb_0.conda#e18ad67cf881dcadee8b8d9e2f8e5f73 -https://conda.anaconda.org/conda-forge/linux-aarch64/openssl-3.6.2-h546c87b_0.conda#3b129669089e4d6a5c6871dbb4669b99 -https://conda.anaconda.org/conda-forge/noarch/python_abi-3.14-8_cp314.conda#0539938c55b6b1a59b560e843ad864a4 -https://conda.anaconda.org/conda-forge/linux-aarch64/readline-8.3-hb682ff5_0.conda#3d49cad61f829f4f0e0611547a9cda12 -https://conda.anaconda.org/conda-forge/linux-aarch64/tk-8.6.13-noxft_h0dc03b3_103.conda#7fc6affb9b01e567d2ef1d05b84aa6ed -https://conda.anaconda.org/conda-forge/noarch/tzdata-2025c-hc9c84f9_1.conda#ad659d0a2b3e47e38d829aa8cad2d610 -https://conda.anaconda.org/conda-forge/linux-aarch64/python-3.14.4-hfd9ac0a_100_cp314.conda#3cfbe780f0f51cc8cba41db9f8a28bfe -https://conda.anaconda.org/conda-forge/noarch/cpython-3.14.4-py314hd8ed1ab_100.conda#f111d4cfaf1fe9496f386bc98ae94452 -https://conda.anaconda.org/conda-forge/noarch/python-gil-3.14.4-h4df99d1_100.conda#e4e60721757979d01d3964122f674959 -https://conda.anaconda.org/conda-forge/noarch/_python_abi3_support-1.0-hd8ed1ab_2.conda#aaa2a381ccc56eac91d63b6c1240312f -https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.15.0-pyhcf101f3_0.conda#0caa1af407ecff61170c9437a808404d -https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.15.0-h396c80c_0.conda#edd329d7d3a4ab45dcf905899a7a6115 -https://conda.anaconda.org/conda-forge/noarch/annotated-types-0.7.0-pyhd8ed1ab_1.conda#2934f256a8acfe48f6ebb4fce6cde29c -https://conda.anaconda.org/conda-forge/noarch/attrs-26.1.0-pyhcf101f3_0.conda#c6b0543676ecb1fb2d7643941fe375f2 -https://conda.anaconda.org/conda-forge/noarch/backports.zstd-1.3.0-py314h680f03e_0.conda#a2ac7763a9ac75055b68f325d3255265 -https://conda.anaconda.org/conda-forge/linux-aarch64/libstdcxx-15.2.0-hef695bb_18.conda#f56573d05e3b735cb03efeb64a15f388 -https://conda.anaconda.org/conda-forge/linux-aarch64/brotli-python-1.2.0-py314h352cb57_1.conda#a1b5c571a0923a205d663d8678df4792 -https://conda.anaconda.org/conda-forge/noarch/certifi-2026.4.22-pyhd8ed1ab_0.conda#929471569c93acefb30282a22060dcd5 -https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.4.7-pyhd8ed1ab_0.conda#a9167b9571f3baa9d448faa2139d1089 -https://conda.anaconda.org/conda-forge/noarch/click-8.3.2-pyhc90fa1f_0.conda#4d18bc3af7cfcea97bd817164672a08c -https://conda.anaconda.org/conda-forge/noarch/humanfriendly-10.0-pyh707e725_8.conda#7fe569c10905402ed47024fc481bb371 -https://conda.anaconda.org/conda-forge/noarch/coloredlogs-15.0.1-pyhd8ed1ab_4.conda#b866ff7007b934d564961066c8195983 -https://conda.anaconda.org/conda-forge/noarch/networkx-3.6.1-pyhcf101f3_0.conda#a2c1eeadae7a309daed9d62c96012a2b -https://conda.anaconda.org/conda-forge/linux-aarch64/libgfortran5-15.2.0-h1b7bec0_18.conda#574d88ce3348331e962cfa5ed451b247 -https://conda.anaconda.org/conda-forge/linux-aarch64/libgfortran-15.2.0-he9431aa_18.conda#41f261f5e4e2e8cbd236c2f1f15dae1b -https://conda.anaconda.org/conda-forge/linux-aarch64/libopenblas-0.3.32-pthreads_h9d3fd7e_0.conda#5d2ce5cf40443d055ec6d33840192265 -https://conda.anaconda.org/conda-forge/linux-aarch64/libblas-3.11.0-6_haddc8a3_openblas.conda#652bb20bb4618cacd11e17ae070f47ce -https://conda.anaconda.org/conda-forge/linux-aarch64/libcblas-3.11.0-6_hd72aa62_openblas.conda#939e300b110db241a96a1bed438c315b -https://conda.anaconda.org/conda-forge/linux-aarch64/liblapack-3.11.0-6_h88aeb00_openblas.conda#e23a27b52fb320687239e2c5ae4d7540 -https://conda.anaconda.org/conda-forge/linux-aarch64/numpy-2.4.3-py314haac167e_0.conda#25d896c331481145720a21e5145fad65 -https://conda.anaconda.org/conda-forge/noarch/colormath-3.0.0-pyhd8ed1ab_4.conda#071cf7b0ce333c81718b054066c15102 -https://conda.anaconda.org/conda-forge/linux-aarch64/expat-2.7.5-hfae3067_0.conda#d2bb0c889d94f2fdc5856392c3002976 -https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45 -https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6 -https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb -https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_3.conda#49023d73832ef61042f6a237cb2687e7 -https://conda.anaconda.org/conda-forge/linux-aarch64/libpng-1.6.58-h1abf092_0.conda#f51503ac45a4888bce71af9027a2ecc9 -https://conda.anaconda.org/conda-forge/linux-aarch64/libfreetype6-2.14.3-hdae7a39_0.conda#b99ed99e42dafb27889483b3098cace7 -https://conda.anaconda.org/conda-forge/linux-aarch64/libfreetype-2.14.3-h8af1aa0_0.conda#a229e22d4d8814a07702b0919d8e6701 -https://conda.anaconda.org/conda-forge/linux-aarch64/fontconfig-2.17.1-hba86a56_0.conda#0fed1ff55f4938a65907f3ecf62609db -https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-hc364b38_1.conda#a7970cd949a077b7cb9696379d338681 -https://conda.anaconda.org/conda-forge/noarch/hpack-4.1.0-pyhd8ed1ab_0.conda#0a802cb9888dd14eeefc611f05c40b6e -https://conda.anaconda.org/conda-forge/noarch/hyperframe-6.1.0-pyhd8ed1ab_0.conda#8e6923fc12f1fe8f8c4e5c9f343256ac -https://conda.anaconda.org/conda-forge/noarch/h2-4.3.0-pyhcf101f3_0.conda#164fc43f0b53b6e3a7bc7dce5e4f1dc9 -https://conda.anaconda.org/conda-forge/noarch/humanize-4.15.0-pyhd8ed1ab_0.conda#daddf757c3ecd6067b9af1df1f25d89e -https://conda.anaconda.org/conda-forge/noarch/idna-3.13-pyhcf101f3_0.conda#fb7130c190f9b4ec91219840a05ba3ac -https://conda.anaconda.org/conda-forge/noarch/zipp-3.23.1-pyhcf101f3_0.conda#e1c36c6121a7c9c76f2f148f1e83b983 -https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-8.8.0-pyhcf101f3_0.conda#080594bf4493e6bae2607e65390c520a -https://conda.anaconda.org/conda-forge/linux-aarch64/markupsafe-3.0.3-py314hb76de3f_1.conda#e5de3c36dd548b35ff2a8aa49208dcb3 -https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.6-pyhcf101f3_1.conda#04558c96691bed63104678757beb4f8d -https://conda.anaconda.org/conda-forge/linux-aarch64/rpds-py-0.30.0-py314h02b7a91_0.conda#e7f6ed9e60043bb5cbcc527764897f0d -https://conda.anaconda.org/conda-forge/noarch/referencing-0.37.0-pyhcf101f3_0.conda#870293df500ca7e18bedefa5838a22ab -https://conda.anaconda.org/conda-forge/noarch/jsonschema-specifications-2025.9.1-pyhcf101f3_0.conda#439cd0f567d697b20a8f45cb70a1005a -https://conda.anaconda.org/conda-forge/noarch/jsonschema-4.26.0-pyhcf101f3_0.conda#ada41c863af263cc4c5fcbaff7c3e4dc -https://conda.anaconda.org/conda-forge/linux-aarch64/libgcc-ng-15.2.0-he9431aa_18.conda#4feebd0fbf61075a1a9c2e9b3936c257 -https://conda.anaconda.org/conda-forge/linux-aarch64/mathjax-2.7.7-h8af1aa0_3.tar.bz2#7b08314a6867a9d5648a1c3265e9eb8e -https://conda.anaconda.org/conda-forge/linux-aarch64/nspr-4.38-h3ad9384_0.conda#6dd4f07147774bf720075a210f8026b9 -https://conda.anaconda.org/conda-forge/linux-aarch64/nss-3.118-h544fa81_0.conda#4540f9570d12db2150f42ba036154552 -https://conda.anaconda.org/conda-forge/linux-aarch64/sqlite-3.53.0-he8854b5_0.conda#ad8164bdeece883b825c50639c0c4725 -https://conda.anaconda.org/conda-forge/linux-aarch64/kaleido-core-0.2.1-he5a581e_0.tar.bz2#4f0d284f5d11e04277b552eb1c172c7f -https://conda.anaconda.org/conda-forge/linux-aarch64/libjpeg-turbo-3.1.4.1-he30d5cf_0.conda#a85ba48648f6868016f2741fd9170250 -https://conda.anaconda.org/conda-forge/linux-aarch64/lerc-4.1.0-h52b7260_0.conda#d13423b06447113a90b5b1366d4da171 -https://conda.anaconda.org/conda-forge/linux-aarch64/libdeflate-1.25-h1af38f5_0.conda#a9138815598fe6b91a1d6782ca657b0c -https://conda.anaconda.org/conda-forge/linux-aarch64/libwebp-base-1.6.0-ha2e29f5_0.conda#24e92d0942c799db387f5c9d7b81f1af -https://conda.anaconda.org/conda-forge/linux-aarch64/libtiff-4.7.1-hdb009f0_1.conda#8c6fd84f9c87ac00636007c6131e457d -https://conda.anaconda.org/conda-forge/linux-aarch64/lcms2-2.18-h9d5b58d_0.conda#bb960f01525b5e001608afef9d47b79c -https://conda.anaconda.org/conda-forge/linux-aarch64/pthread-stubs-0.4-h86ecc28_1002.conda#bb5a90c93e3bac3d5690acf76b4a6386 -https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxau-1.0.12-he30d5cf_1.conda#1c246e1105000c3660558459e2fd6d43 -https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxdmcp-1.1.5-he30d5cf_1.conda#bff06dcde4a707339d66d45d96ceb2e2 -https://conda.anaconda.org/conda-forge/linux-aarch64/libxcb-1.17.0-h262b8f6_0.conda#cd14ee5cca2464a425b1dbfc24d90db2 -https://conda.anaconda.org/conda-forge/noarch/markdown-3.10.2-pyhcf101f3_0.conda#ba0a9221ce1063f31692c07370d062f3 -https://conda.anaconda.org/conda-forge/noarch/mdurl-0.1.2-pyhd8ed1ab_1.conda#592132998493b3ff25fd7479396e8351 -https://conda.anaconda.org/conda-forge/noarch/markdown-it-py-4.0.0-pyhd8ed1ab_0.conda#5b5203189eb668f042ac2b0826244964 -https://conda.anaconda.org/conda-forge/noarch/natsort-8.4.0-pyhcf101f3_2.conda#e941e85e273121222580723010bd4fa2 -https://conda.anaconda.org/conda-forge/noarch/packaging-26.1-pyhc364b38_0.conda#b8ae38639d323d808da535fb71e31be8 -https://conda.anaconda.org/conda-forge/linux-aarch64/openjpeg-2.5.4-h5da879a_0.conda#cea962410e327262346d48d01f05936c -https://conda.anaconda.org/conda-forge/linux-aarch64/zlib-ng-2.3.3-ha7cb516_1.conda#f731af71c723065d91b4c01bb822641b -https://conda.anaconda.org/conda-forge/linux-aarch64/pillow-12.2.0-py314hac3e5ec_0.conda#87d58d103b47c4a8567b3d7666647684 -https://conda.anaconda.org/conda-forge/noarch/narwhals-2.20.0-pyhcf101f3_0.conda#6cac1a50359219d786453c6fef819f98 -https://conda.anaconda.org/conda-forge/noarch/plotly-6.6.0-pyhd8ed1ab_0.conda#3e9427ee186846052e81fadde8ebe96a -https://conda.anaconda.org/conda-forge/linux-aarch64/polars-runtime-32-1.40.0-py310hff09b76_0.conda#d5628a33ce7652511e38fc98643dc910 -https://conda.anaconda.org/conda-forge/noarch/polars-1.40.0-pyh58ad624_0.conda#fd16be490f5403adfbf27dd4901bbe34 -https://conda.anaconda.org/conda-forge/linux-aarch64/polars-runtime-compat-1.40.0-py310hf00a4a2_0.conda#a82af0fcbb72db253dc89a7a45279372 -https://conda.anaconda.org/conda-forge/noarch/polars-lts-cpu-1.34.0.deprecated-hc364b38_0.conda#ef0340e75068ac8ff96462749b5c98e7 -https://conda.anaconda.org/conda-forge/linux-aarch64/yaml-0.2.5-h80f16a2_3.conda#032d8030e4a24fe1f72c74423a46fb88 -https://conda.anaconda.org/conda-forge/linux-aarch64/pyyaml-6.0.3-py314h807365f_1.conda#9ae2c92975118058bd720e9ba2bb7c58 -https://conda.anaconda.org/conda-forge/noarch/pyaml-env-1.2.2-pyhd8ed1ab_0.conda#e17be1016bcc3516827b836cd3e4d9dc -https://conda.anaconda.org/conda-forge/linux-aarch64/pydantic-core-2.46.3-py314h451b6cc_0.conda#1a2cb55be9a153ad6203bff6b787c240 -https://conda.anaconda.org/conda-forge/noarch/typing-inspection-0.4.2-pyhd8ed1ab_1.conda#a0a4a3035667fc34f29bfbd5c190baa6 -https://conda.anaconda.org/conda-forge/noarch/pydantic-2.13.3-pyhcf101f3_0.conda#f690e6f204efd2e5c06b57518a383d98 -https://conda.anaconda.org/conda-forge/noarch/python-dotenv-1.2.2-pyhcf101f3_0.conda#130584ad9f3a513cdd71b1fdc1244e9c -https://conda.anaconda.org/conda-forge/noarch/python-kaleido-0.2.1-pyhd8ed1ab_0.tar.bz2#310259a5b03ff02289d7705f39e2b1d2 -https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha55dd90_7.conda#461219d1a5bd61342293efa2c0c90eac -https://conda.anaconda.org/conda-forge/noarch/urllib3-2.6.3-pyhd8ed1ab_0.conda#9272daa869e03efe68833e3dc7a02130 -https://conda.anaconda.org/conda-forge/noarch/requests-2.33.1-pyhcf101f3_0.conda#10afbb4dbf06ff959ad25a92ccee6e59 -https://conda.anaconda.org/conda-forge/noarch/pygments-2.20.0-pyhd8ed1ab_0.conda#16c18772b340887160c79a6acc022db0 -https://conda.anaconda.org/conda-forge/noarch/rich-15.0.0-pyhcf101f3_0.conda#0242025a3c804966bf71aa04eee82f66 -https://conda.anaconda.org/conda-forge/noarch/rich-click-1.9.7-pyh8f84b5b_0.conda#0c20a8ebcddb24a45da89d5e917e6cb9 -https://conda.anaconda.org/conda-forge/noarch/spectra-0.0.11-pyhd8ed1ab_2.conda#472239e4eb7b5a84bb96b3ed7e3a596a -https://conda.anaconda.org/conda-forge/linux-aarch64/regex-2026.4.4-py314h51f160d_0.conda#88a3dbd279e6b1faf0cddb8397866864 -https://conda.anaconda.org/conda-forge/linux-aarch64/tiktoken-0.12.0-py314h6a36e60_3.conda#55bf7b559202236157b14323b40f19e6 -https://conda.anaconda.org/conda-forge/noarch/tqdm-4.67.3-pyh8f84b5b_0.conda#e5ce43272193b38c2e9037446c1d9206 -https://conda.anaconda.org/conda-forge/noarch/typeguard-4.5.1-pyhd8ed1ab_0.conda#260af1b0a94f719de76b4e14094e9a3b -https://conda.anaconda.org/bioconda/noarch/multiqc-1.34-pyhdfd78af_0.conda#a7111ab9a6a6146b40cbce16655ac873 -https://conda.anaconda.org/conda-forge/noarch/pip-26.0.1-pyh145f28c_0.conda#09a970fbf75e8ed1aa633827ded6aa4f -https://conda.anaconda.org/conda-forge/linux-aarch64/procps-ng-4.0.6-h1779866_0.conda#ab7288cc39545556d1bc5e71ab2df9a9 diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index 37e7612d..009874d4 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::multiqc=1.34 + - bioconda::multiqc=1.33 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index e80e8cd8..ae8f5c86 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -3,9 +3,7 @@ process MULTIQC { label 'process_single' conda "${moduleDir}/environment.yml" - container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container - ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/1b/1bef8af6be88c5733461959c46ac8ef73d18f65277f62a1695d0e1633054f9c2/data' - : 'community.wave.seqera.io/library/multiqc:1.34--db7c73dae76bc9e6'}" + container "community.wave.seqera.io/library/multiqc-xenium-extra_multiqc_polars_scanpy_scipy:4e27199c6ca05c8b" input: tuple val(meta), path(multiqc_files, stageAs: "?/*"), path(multiqc_config, stageAs: "?/*"), path(multiqc_logo), path(replace_names), path(sample_names) diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index 2facc627..ef434a9a 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -110,24 +110,24 @@ maintainers: containers: conda: linux/amd64: - lock_file: modules/nf-core/multiqc/.conda-lock/linux_amd64-bd-db7c73dae76bc9e6_1.txt + lock_file: https://wave.seqera.io/v1alpha1/builds/bd-ee7739d47738383b_1/condalock linux/arm64: - lock_file: modules/nf-core/multiqc/.conda-lock/linux_arm64-bd-d167b8012595a136_1.txt + lock_file: https://wave.seqera.io/v1alpha1/builds/bd-58d7dee710ab3aa8_1/condalock docker: linux/amd64: - name: community.wave.seqera.io/library/multiqc:1.34--db7c73dae76bc9e6 - build_id: bd-db7c73dae76bc9e6_1 - scan_id: sc-66fc7138dbf1cf48_1 + build_id: bd-ee7739d47738383b_1 + name: community.wave.seqera.io/library/multiqc:1.33--ee7739d47738383b + scanId: sc-6ddec592dcadd583_4 linux/arm64: - name: community.wave.seqera.io/library/multiqc:1.34--d167b8012595a136 - build_id: bd-d167b8012595a136_1 - scan_id: sc-ac701dfa631a2af9_1 + build_id: bd-58d7dee710ab3aa8_1 + name: community.wave.seqera.io/library/multiqc:1.33--58d7dee710ab3aa8 + scanId: sc-a04c42273e34c55c_2 singularity: linux/amd64: - name: oras://community.wave.seqera.io/library/multiqc:1.34--4fc8657c816047c0 - build_id: bd-4fc8657c816047c0_1 - https: https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/1b/1bef8af6be88c5733461959c46ac8ef73d18f65277f62a1695d0e1633054f9c2/data + build_id: bd-e3576ddf588fa00d_1 + https: https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/34/34e733a9ae16a27e80fe00f863ea1479c96416017f24a907996126283e7ecd4d/data + name: oras://community.wave.seqera.io/library/multiqc:1.33--e3576ddf588fa00d linux/arm64: - name: oras://community.wave.seqera.io/library/multiqc:1.34--7fbd82d945c06726 - build_id: bd-7fbd82d945c06726_1 - https: https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/9a/9a1fec9662a152683e6fcae440d0ce20920b3b89dc62d1e3a52e73f92eba0969/data + build_id: bd-2537ca5f8445e3c2_1 + https: https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/78/78b89e91d89e9cc99ad5ade5be311f347838cb2acbfb4f13bc343b170be09ce4/data + name: oras://community.wave.seqera.io/library/multiqc:1.33--2537ca5f8445e3c2 diff --git a/modules/nf-core/multiqc/multiqc.diff b/modules/nf-core/multiqc/multiqc.diff new file mode 100644 index 00000000..a208f7c0 --- /dev/null +++ b/modules/nf-core/multiqc/multiqc.diff @@ -0,0 +1,23 @@ +Changes in component 'nf-core/multiqc' +Changes in 'multiqc/main.nf': +--- modules/nf-core/multiqc/main.nf ++++ modules/nf-core/multiqc/main.nf +@@ -3,9 +3,7 @@ + label 'process_single' + + conda "${moduleDir}/environment.yml" +- container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container +- ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/34/34e733a9ae16a27e80fe00f863ea1479c96416017f24a907996126283e7ecd4d/data' +- : 'community.wave.seqera.io/library/multiqc:1.33--ee7739d47738383b'}" ++ container "community.wave.seqera.io/library/multiqc-xenium-extra_multiqc_polars_scanpy_scipy:4e27199c6ca05c8b" + + input: + tuple val(meta), path(multiqc_files, stageAs: "?/*"), path(multiqc_config, stageAs: "?/*"), path(multiqc_logo), path(replace_names), path(sample_names) + +'modules/nf-core/multiqc/environment.yml' is unchanged +'modules/nf-core/multiqc/meta.yml' is unchanged +'modules/nf-core/multiqc/tests/nextflow.config' is unchanged +'modules/nf-core/multiqc/tests/main.nf.test' is unchanged +'modules/nf-core/multiqc/tests/main.nf.test.snap' is unchanged +'modules/nf-core/multiqc/tests/custom_prefix.config' is unchanged +************************************************************ diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap index 7c2f370f..3bfc524f 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test.snap +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -81,7 +81,7 @@ [ "MULTIQC", "multiqc", - "1.34" + "1.33" ] ] } @@ -175,7 +175,7 @@ [ "MULTIQC", "multiqc", - "1.34" + "1.33" ] ] } @@ -221,7 +221,7 @@ [ "MULTIQC", "multiqc", - "1.34" + "1.33" ] ] } @@ -314,7 +314,7 @@ [ "MULTIQC", "multiqc", - "1.34" + "1.33" ] ] } @@ -408,7 +408,7 @@ [ "MULTIQC", "multiqc", - "1.34" + "1.33" ] ] } diff --git a/modules/nf-core/opt/flip/main.nf b/modules/nf-core/opt/flip/main.nf new file mode 100644 index 00000000..66be07d0 --- /dev/null +++ b/modules/nf-core/opt/flip/main.nf @@ -0,0 +1,60 @@ +process OPT_FLIP { + tag "$meta.id" + label 'process_high' + + container "khersameesh24/opt:v0.0.1" + + input: + tuple val(meta), path(probes_fasta) + tuple val(meta2), path(ref_annot_gff), path(ref_annot_fa) + + output: + tuple val(meta), path("${prefix}/fwd_oriented.fa"), emit: fwd_oriented_fa + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "OPT_FLIP module does not support Conda. Please use Docker / Singularity / Podman instead." + } + + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + """ + opt \\ + -o ${prefix} \\ + -p ${task.cpus} \\ + flip \\ + -q ${probes_fasta} \\ + -a ${ref_annot_gff} \\ + -t ${ref_annot_fa} \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + opt: \$(opt --version) + END_VERSIONS + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "OPT_FLIP module does not support Conda. Please use Docker / Singularity / Podman instead." + } + + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p ${prefix} + touch "${prefix}/fwd_oriented.fa" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + opt: \$(opt --version) + END_VERSIONS + """ +} diff --git a/modules/nf-core/opt/flip/meta.yml b/modules/nf-core/opt/flip/meta.yml new file mode 100644 index 00000000..f0e4c57c --- /dev/null +++ b/modules/nf-core/opt/flip/meta.yml @@ -0,0 +1,68 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "opt_flip" +description: "flip corrects probes that are aligning to the opposite strand of their intended target genes by reverse complementing them" +keywords: + - opt + - opt flip + - transcripts + - off-target probes + - align probes +tools: + - "opt": + description: "opt is a simple program that aligns probe sequences to transcript sequences to detect potential off-target probe activity" + homepage: "https://github.com/JEFworks-Lab/off-target-probe-tracker" + documentation: "https://github.com/JEFworks-Lab/off-target-probe-tracker/blob/main/README.md" + tool_dev_url: "https://github.com/JEFworks-Lab/off-target-probe-tracker" + licence: [GPL-3.0 license] + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information of the probe panel sequences used for the xenium experiment + e.g. `[ id:'breast_cancer_probe_panel_sequences' ]` + - probes_fasta: + type: file + description: Fasta file for the probe sequences used in the xenium experiment + pattern: "*.fasta" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing the information of the genomic features and fasta files used as references + e.g. `[ id:'gencode_references' ]` + - ref_annot_gff: + type: file + description: Reference annotations in gff format + pattern: "*.gff" + ontologies: [] + - ref_annot_fa: + type: file + description: Reference annotations in fasta format + pattern: "*.fa" + ontologies: [] + +output: + fwd_oriented_fa: + - - meta: + type: map + description: | + Groovy Map containing information of the forward oriented fasta generated with the probes panel sequences 'opt flip' + e.g. `[ id:'breast_cancer_probe_panel_sequences' ]` + - "${meta.id}/fwd_oriented.fa": + type: file + description: The forward oriented fasta file + pattern: "*.fa" + ontologies: [] + versions: + - "versions.yml": + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: "http://edamontology.org/format_3750" # YAML + +authors: + - "@khersameesh24" +maintainers: + - "@khersameesh24" diff --git a/modules/nf-core/opt/flip/opt-flip.diff b/modules/nf-core/opt/flip/opt-flip.diff new file mode 100644 index 00000000..09c6098d --- /dev/null +++ b/modules/nf-core/opt/flip/opt-flip.diff @@ -0,0 +1,48 @@ +Changes in component 'nf-core/opt/flip' +Changes in 'opt/flip/main.nf': +--- modules/nf-core/opt/flip/main.nf ++++ modules/nf-core/opt/flip/main.nf +@@ -9,8 +9,8 @@ + tuple val(meta2), path(ref_annot_gff), path(ref_annot_fa) + + output: +- tuple val(meta), path("${meta.id}/fwd_oriented.fa"), emit: fwd_oriented_fa +- path "versions.yml" , emit: versions ++ tuple val(meta), path("${prefix}/fwd_oriented.fa"), emit: fwd_oriented_fa ++ path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when +@@ -20,9 +20,10 @@ + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "OPT_FLIP module does not support Conda. Please use Docker / Singularity / Podman instead." + } +- def args = task.ext.args ?: '' +- def prefix = task.ext.prefix ?: "${meta.id}" + ++ def args = task.ext.args ?: '' ++ prefix = task.ext.prefix ?: "${meta.id}" ++ + """ + opt \\ + -o ${prefix} \\ +@@ -40,8 +41,13 @@ + """ + + stub: +- def prefix = task.ext.prefix ?: "${meta.id}" ++ // Exit if running this module with -profile conda / -profile mamba ++ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { ++ error "OPT_FLIP module does not support Conda. Please use Docker / Singularity / Podman instead." ++ } + ++ prefix = task.ext.prefix ?: "${meta.id}" ++ + """ + mkdir -p ${prefix} + touch "${prefix}/fwd_oriented.fa" + +'modules/nf-core/opt/flip/meta.yml' is unchanged +'modules/nf-core/opt/flip/tests/main.nf.test' is unchanged +'modules/nf-core/opt/flip/tests/main.nf.test.snap' is unchanged +************************************************************ diff --git a/modules/nf-core/opt/flip/tests/main.nf.test b/modules/nf-core/opt/flip/tests/main.nf.test new file mode 100644 index 00000000..77fd9ef4 --- /dev/null +++ b/modules/nf-core/opt/flip/tests/main.nf.test @@ -0,0 +1,61 @@ +nextflow_process { + + name "Test Process OPT_FLIP" + script "../main.nf" + process "OPT_FLIP" + + tag "modules" + tag "modules_nfcore" + tag "opt" + tag "opt/flip" + + test("testrun panel probe sequences") { + + when { + process { + """ + input[0] = [ + [ id:'test_run' ], + file('https://raw.githubusercontent.com/khersameesh24/test-datasets/opt/testdata/panel_probe_sequences.fasta', checkIfExists: true) + ] + input[1] = [ + [ id:'test_run' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("testrun panel probe sequences -stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test_run' ], + file('https://raw.githubusercontent.com/khersameesh24/test-datasets/opt/testdata/panel_probe_sequences.fasta', checkIfExists: true) + ] + input[1] = [ + [ id:'test_run' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/opt/flip/tests/main.nf.test.snap b/modules/nf-core/opt/flip/tests/main.nf.test.snap new file mode 100644 index 00000000..4bc5b35e --- /dev/null +++ b/modules/nf-core/opt/flip/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "testrun panel probe sequences -stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_run" + }, + "fwd_oriented.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,57dbc672cf0bf40854c18d241f2d7d2e" + ], + "fwd_oriented_fa": [ + [ + { + "id": "test_run" + }, + "fwd_oriented.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,57dbc672cf0bf40854c18d241f2d7d2e" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-14T12:43:08.105988801" + }, + "testrun panel probe sequences": { + "content": [ + { + "0": [ + [ + { + "id": "test_run" + }, + "fwd_oriented.fa:md5,535289c04851ad94e091ec7c14ff6bcd" + ] + ], + "1": [ + "versions.yml:md5,57dbc672cf0bf40854c18d241f2d7d2e" + ], + "fwd_oriented_fa": [ + [ + { + "id": "test_run" + }, + "fwd_oriented.fa:md5,535289c04851ad94e091ec7c14ff6bcd" + ] + ], + "versions": [ + "versions.yml:md5,57dbc672cf0bf40854c18d241f2d7d2e" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-14T12:47:41.966183182" + } +} \ No newline at end of file diff --git a/modules/nf-core/opt/stat/main.nf b/modules/nf-core/opt/stat/main.nf new file mode 100644 index 00000000..e8de5860 --- /dev/null +++ b/modules/nf-core/opt/stat/main.nf @@ -0,0 +1,60 @@ +process OPT_STAT { + tag "$meta.id" + label 'process_high' + + container "khersameesh24/opt:v0.0.1" + + input: + tuple val(meta), path(probe_targets) + tuple val(meta2), path(fwd_oriented_probes) + path(gene_synonyms) + + output: + tuple val(meta), path("${prefix}/collapsed_summary.tsv"), emit: summary + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "OPT_STAT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + def synonyms = gene_synonyms ? "-s ${gene_synonyms}": "" + prefix = task.ext.prefix ?: "${meta.id}" + + """ + opt \\ + -o ${prefix} \\ + stat \\ + -i ${probe_targets} \\ + -q ${fwd_oriented_probes} \\ + ${synonyms} \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + opt: \$(opt --version) + END_VERSIONS + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "OPT_STAT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p ${prefix} + touch "${prefix}/collapsed_summary.tsv" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + opt: \$(opt --version) + END_VERSIONS + """ +} diff --git a/modules/nf-core/opt/stat/meta.yml b/modules/nf-core/opt/stat/meta.yml new file mode 100644 index 00000000..ae2d712b --- /dev/null +++ b/modules/nf-core/opt/stat/meta.yml @@ -0,0 +1,72 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "opt_stat" +description: "stat summarizes opt binding predictions" +keywords: + - opt + - opt stat + - transcripts + - binding predictions + - off-target probes + - align probes + - summary stats +tools: + - "opt": + description: "opt is a simple program that aligns probe sequences to transcript + sequences to detect potential off-target probe activity" + homepage: "https://github.com/JEFworks-Lab/off-target-probe-tracker" + documentation: "https://github.com/JEFworks-Lab/off-target-probe-tracker/blob/main/README.md" + tool_dev_url: "https://github.com/JEFworks-Lab/off-target-probe-tracker" + licence: [GPL-3.0 license] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing information of the probe targets generated from the panel sequences with `opt track` + e.g. `[ id:'breast_cancer_probe_panel_sequences' ]` + - probe_targets: + type: file + description: Generated probe targets + pattern: "*.tsv" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing information of the forward oriented fasta generated with the probes panel sequences 'opt flip' + e.g. `[ id:'breast_cancer_probe_panel_sequences' ]` + - fwd_oriented_probes: + type: file + description: The forward oriented fasta file + pattern: "*.fa" + ontologies: [] + - gene_synonyms: + type: file + description: Gene synonyms that may have been counted as off-targets but + simply differ in name (optional input) + pattern: "*.csv" + ontologies: [] +output: + summary: + - - meta: + type: map + description: | + Groovy Map containing summary of the forward oriented probes generated with the panel sequences 'opt flip and track' + e.g. `[ id:'breast_cancer_probe_panel_sequences' ]` + - "${meta.id}/collapsed_summary.tsv": + type: file + description: tsv file containing the summary stats + pattern: "*.tsv" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: "http://edamontology.org/format_3750" # YAML + +authors: + - "@khersameesh24" +maintainers: + - "@khersameesh24" diff --git a/modules/nf-core/opt/stat/opt-stat.diff b/modules/nf-core/opt/stat/opt-stat.diff new file mode 100644 index 00000000..348e77b5 --- /dev/null +++ b/modules/nf-core/opt/stat/opt-stat.diff @@ -0,0 +1,44 @@ +Changes in component 'nf-core/opt/stat' +Changes in 'opt/stat/main.nf': +--- modules/nf-core/opt/stat/main.nf ++++ modules/nf-core/opt/stat/main.nf +@@ -10,8 +10,8 @@ + path(gene_synonyms) + + output: +- tuple val(meta), path("${meta.id}/collapsed_summary.tsv"), emit: summary +- path "versions.yml" , emit: versions ++ tuple val(meta), path("${prefix}/collapsed_summary.tsv"), emit: summary ++ path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when +@@ -22,8 +22,8 @@ + error "OPT_STAT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' +- def prefix = task.ext.prefix ?: "${meta.id}" + def synonyms = gene_synonyms ? "-s ${gene_synonyms}": "" ++ prefix = task.ext.prefix ?: "${meta.id}" + + """ + opt \\ +@@ -41,7 +41,12 @@ + """ + + stub: +- def prefix = task.ext.prefix ?: "${meta.id}" ++ // Exit if running this module with -profile conda / -profile mamba ++ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { ++ error "OPT_STAT module does not support Conda. Please use Docker / Singularity / Podman instead." ++ } ++ ++ prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p ${prefix} + +'modules/nf-core/opt/stat/meta.yml' is unchanged +'modules/nf-core/opt/stat/tests/main.nf.test' is unchanged +'modules/nf-core/opt/stat/tests/main.nf.test.snap' is unchanged +************************************************************ diff --git a/modules/nf-core/opt/stat/tests/main.nf.test b/modules/nf-core/opt/stat/tests/main.nf.test new file mode 100644 index 00000000..5204a81f --- /dev/null +++ b/modules/nf-core/opt/stat/tests/main.nf.test @@ -0,0 +1,63 @@ +nextflow_process { + + name "Test Process OPT_STAT" + script "../main.nf" + process "OPT_STAT" + + tag "modules" + tag "modules_nfcore" + tag "opt" + tag "opt/stat" + + test("testrun panel probe sequences") { + + when { + process { + """ + input[0] = [ + [ id:'test_run' ], + file('https://raw.githubusercontent.com/khersameesh24/test-datasets/opt/testdata/probe2targets.tsv', checkIfExists: true) + ] + input[1] = [ + [ id:'test_run' ], + file('https://raw.githubusercontent.com/khersameesh24/test-datasets/opt/testdata/fwd_oriented.fa', checkIfExists: true) + ] + input[2] = [] + // input[2] = [file('https://raw.githubusercontent.com/khersameesh24/test-datasets/opt/testdata/gene_synonyms.csv', checkIfExists: true)] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("testrun panel probe sequences -stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test_run' ], + file('https://raw.githubusercontent.com/khersameesh24/test-datasets/opt/testdata/probe2targets.tsv', checkIfExists: true) + ] + input[1] = [ + [ id:'test_run' ], + file('https://raw.githubusercontent.com/khersameesh24/test-datasets/opt/testdata/fwd_oriented.fa', checkIfExists: true) + ] + input[2] = [] + // input[2] = [file('https://raw.githubusercontent.com/khersameesh24/test-datasets/opt/testdata/gene_synonyms.csv', checkIfExists: true)] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/opt/stat/tests/main.nf.test.snap b/modules/nf-core/opt/stat/tests/main.nf.test.snap new file mode 100644 index 00000000..b14671fe --- /dev/null +++ b/modules/nf-core/opt/stat/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "testrun panel probe sequences -stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_run" + }, + "collapsed_summary.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,a23b08ea3b2da18863a5611dd0adbaa1" + ], + "summary": [ + [ + { + "id": "test_run" + }, + "collapsed_summary.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,a23b08ea3b2da18863a5611dd0adbaa1" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-14T12:23:32.959930982" + }, + "testrun panel probe sequences": { + "content": [ + { + "0": [ + [ + { + "id": "test_run" + }, + "collapsed_summary.tsv:md5,b2884d9c8c89124d3cbbbc1223d81c99" + ] + ], + "1": [ + "versions.yml:md5,a23b08ea3b2da18863a5611dd0adbaa1" + ], + "summary": [ + [ + { + "id": "test_run" + }, + "collapsed_summary.tsv:md5,b2884d9c8c89124d3cbbbc1223d81c99" + ] + ], + "versions": [ + "versions.yml:md5,a23b08ea3b2da18863a5611dd0adbaa1" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-14T12:58:57.17786325" + } +} \ No newline at end of file diff --git a/modules/nf-core/opt/track/main.nf b/modules/nf-core/opt/track/main.nf new file mode 100644 index 00000000..ff92645e --- /dev/null +++ b/modules/nf-core/opt/track/main.nf @@ -0,0 +1,59 @@ +process OPT_TRACK { + tag "$meta.id" + label 'process_high' + + container "khersameesh24/opt:v0.0.1" + + input: + tuple val(meta), path(fwd_oriented_fa) + tuple val(meta2), path(ref_annot_gff), path(ref_annot_fa) + + output: + tuple val(meta), path("${prefix}/probe2targets.tsv"), emit: probes2target + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "OPT_TRACK module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + """ + opt \\ + -o ${prefix} \\ + -p ${task.cpus} \\ + track \\ + -q ${fwd_oriented_fa} \\ + -a ${ref_annot_gff} \\ + -t ${ref_annot_fa} \\ + ${args} + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + opt: \$(opt --version) + END_VERSIONS + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "OPT_TRACK module does not support Conda. Please use Docker / Singularity / Podman instead." + } + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p ${prefix} + touch "${prefix}/probe2targets.tsv" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + opt: \$(opt --version) + END_VERSIONS + """ +} diff --git a/modules/nf-core/opt/track/meta.yml b/modules/nf-core/opt/track/meta.yml new file mode 100644 index 00000000..6024d773 --- /dev/null +++ b/modules/nf-core/opt/track/meta.yml @@ -0,0 +1,69 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "opt_track" +description: "track aligns query probe sequences to any target transcriptome" +keywords: + - opt + - opt track + - transcripts + - off-target probes + - align probes + - traget transcriptome +tools: + - "opt": + description: "opt is a simple program that aligns probe sequences to transcript sequences to detect potential off-target probe activity" + homepage: "https://github.com/JEFworks-Lab/off-target-probe-tracker" + documentation: "https://github.com/JEFworks-Lab/off-target-probe-tracker/blob/main/README.md" + tool_dev_url: "https://github.com/JEFworks-Lab/off-target-probe-tracker" + licence: [GPL-3.0 license] + +input: + - - meta: + type: map + description: | + Groovy Map containing information of the forward oriented fasta generated with the probes panel sequences generated with `opt flip` + e.g. `[ id:'breast_cancer_probe_panel_sequences' ]` + - fwd_oriented_fa: + type: file + description: Forward oriented fasta file generated by the opt flip command + pattern: "*.fa" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing the information of the genomic features and fasta files used as references + e.g. `[ id:'gencode_references' ]` + - ref_annot_gff: + type: file + description: Reference annotation in gff format + pattern: "*.gff" + ontologies: [] + - ref_annot_fa: + type: file + description: Reference annotation in fasta format + pattern: "*.fa" + ontologies: [] + +output: + probes2target: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "${meta.id}/probe2targets.tsv": + type: file + description: TSV file containing the gene and transcript information to which each probe aligns + pattern: "*.tsv" + ontologies: [] + versions: + - "versions.yml": + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: "http://edamontology.org/format_3750" # YAML + +authors: + - "@khersameesh24" +maintainers: + - "@khersameesh24" diff --git a/modules/nf-core/opt/track/opt-track.diff b/modules/nf-core/opt/track/opt-track.diff new file mode 100644 index 00000000..6a0098ad --- /dev/null +++ b/modules/nf-core/opt/track/opt-track.diff @@ -0,0 +1,44 @@ +Changes in component 'nf-core/opt/track' +Changes in 'opt/track/main.nf': +--- modules/nf-core/opt/track/main.nf ++++ modules/nf-core/opt/track/main.nf +@@ -9,8 +9,8 @@ + tuple val(meta2), path(ref_annot_gff), path(ref_annot_fa) + + output: +- tuple val(meta), path("${meta.id}/probe2targets.tsv"), emit: probes2target +- path "versions.yml" , emit: versions ++ tuple val(meta), path("${prefix}/probe2targets.tsv"), emit: probes2target ++ path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when +@@ -21,7 +21,7 @@ + error "OPT_TRACK module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' +- def prefix = task.ext.prefix ?: "${meta.id}" ++ prefix = task.ext.prefix ?: "${meta.id}" + + """ + opt \\ +@@ -41,8 +41,12 @@ + """ + + stub: +- def prefix = task.ext.prefix ?: "${meta.id}" +- ++ // Exit if running this module with -profile conda / -profile mamba ++ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { ++ error "OPT_TRACK module does not support Conda. Please use Docker / Singularity / Podman instead." ++ } ++ prefix = task.ext.prefix ?: "${meta.id}" ++ + """ + mkdir -p ${prefix} + touch "${prefix}/probe2targets.tsv" + +'modules/nf-core/opt/track/meta.yml' is unchanged +'modules/nf-core/opt/track/tests/main.nf.test' is unchanged +'modules/nf-core/opt/track/tests/main.nf.test.snap' is unchanged +************************************************************ diff --git a/modules/nf-core/opt/track/tests/main.nf.test b/modules/nf-core/opt/track/tests/main.nf.test new file mode 100644 index 00000000..4fbf9a19 --- /dev/null +++ b/modules/nf-core/opt/track/tests/main.nf.test @@ -0,0 +1,60 @@ +nextflow_process { + + name "Test Process OPT_TRACK" + script "../main.nf" + process "OPT_TRACK" + + tag "modules" + tag "modules_nfcore" + tag "opt" + tag "opt/track" + + test("testrun panel probe sequences") { + + when { + process { + """ + input[0] = [ + [ id:'test_run' ], file('https://raw.githubusercontent.com/khersameesh24/test-datasets/opt/testdata/fwd_oriented.fa', checkIfExists: true) + ] + input[1] = [ + [ id:'test_run' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("testrun panel probe sequences -stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test_run' ], file('https://raw.githubusercontent.com/khersameesh24/test-datasets/opt/testdata/fwd_oriented.fa', checkIfExists: true) + ] + + input[1] = [ + [ id:'test_run' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/opt/track/tests/main.nf.test.snap b/modules/nf-core/opt/track/tests/main.nf.test.snap new file mode 100644 index 00000000..3dda7a91 --- /dev/null +++ b/modules/nf-core/opt/track/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "testrun panel probe sequences -stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_run" + }, + "probe2targets.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,daf660f286a817fa0eed7703a5f65706" + ], + "probes2target": [ + [ + { + "id": "test_run" + }, + "probe2targets.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,daf660f286a817fa0eed7703a5f65706" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-14T12:20:29.220245783" + }, + "testrun panel probe sequences": { + "content": [ + { + "0": [ + [ + { + "id": "test_run" + }, + "probe2targets.tsv:md5,e15465df3845d7a6acf64dd3be04391b" + ] + ], + "1": [ + "versions.yml:md5,daf660f286a817fa0eed7703a5f65706" + ], + "probes2target": [ + [ + { + "id": "test_run" + }, + "probe2targets.tsv:md5,e15465df3845d7a6acf64dd3be04391b" + ] + ], + "versions": [ + "versions.yml:md5,daf660f286a817fa0eed7703a5f65706" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-14T12:20:19.999359259" + } +} \ No newline at end of file diff --git a/modules/nf-core/stardist/environment.yml b/modules/nf-core/stardist/environment.yml new file mode 100644 index 00000000..ddba86ea --- /dev/null +++ b/modules/nf-core/stardist/environment.yml @@ -0,0 +1,26 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::python=3.12.12 + - pip: + - stardist==0.9.2 + - tifffile==2026.3.3 + - imagecodecs==2026.1.14 + # For conda GPU support, use tensorflow[and-cuda]==2.20.0 instead + # The followings are needed in seqera containers UI + - tensorflow==2.20.0 + - nvidia-cublas-cu12==12.9.1.4 + - nvidia-cuda-cupti-cu12==12.9.79 + - nvidia-cuda-nvcc-cu12==12.9.86 + - nvidia-cuda-nvrtc-cu12==12.9.86 + - nvidia-cuda-runtime-cu12==12.9.79 + - nvidia-cudnn-cu12==9.19.0.56 + - nvidia-cufft-cu12==11.4.1.4 + - nvidia-curand-cu12==10.3.10.19 + - nvidia-cusolver-cu12==11.7.5.82 + - nvidia-cusparse-cu12==12.5.10.65 + - nvidia-nccl-cu12==2.29.7 + - nvidia-nvjitlink-cu12==12.9.86 diff --git a/modules/nf-core/stardist/main.nf b/modules/nf-core/stardist/main.nf new file mode 100644 index 00000000..01f4638c --- /dev/null +++ b/modules/nf-core/stardist/main.nf @@ -0,0 +1,41 @@ +process STARDIST { + tag "$meta.id" + label 'process_high' + label 'process_gpu_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/d9/d964e0bef867bb2ff1a309c9c087d8d83ac734ce3aa315dd8311d4c1bfdafd8e/data' : + 'community.wave.seqera.io/library/python_pip_imagecodecs_nvidia-cublas-cu12_pruned:b668bcb6d531d350' }" + + input: + tuple val(meta), path(image) + tuple val(model_name), path(model_path) + + output: + tuple val(meta), path("*.stardist.tif"), emit: mask + tuple val("${task.process}"), val('stardist'), eval("pip show stardist | sed -n 's/^Version: //p'"), topic: versions, emit: versions_stardist + tuple val("${task.process}"), val('python'), eval("python --version | sed 's/Python //'"), topic: versions, emit: versions_python + tuple val("${task.process}"), val('tensorflow'), eval("pip show tensorflow | sed -n 's/^Version: //p'"), topic: versions, emit: versions_tensorflow + tuple val("${task.process}"), val('tifffile'), eval("pip show tifffile | sed -n 's/^Version: //p'"), topic: versions, emit: versions_tifffile + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def model_command = model_path ? "-m ${model_path}" : model_name ? "-m ${model_name}" : "" + """ + stardist-predict2d \\ + -i $image \\ + -o . \\ + $model_command \\ + $args + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.stardist.tif + """ +} diff --git a/modules/nf-core/stardist/meta.yml b/modules/nf-core/stardist/meta.yml new file mode 100644 index 00000000..a7ecb790 --- /dev/null +++ b/modules/nf-core/stardist/meta.yml @@ -0,0 +1,157 @@ +name: "stardist" +description: Cell and nuclear segmentation with star-convex shapes +keywords: + - stardist + - segmentation + - image + - gpu + - spatial-transcriptomics +tools: + - "stardist": + description: "Stardist is an cell segmentation tool developed in Python by Martin + Weigert and Uwe Schmidt" + homepage: "https://stardist.net/" + documentation: "https://stardist.net/faq/" + tool_dev_url: "https://github.com/stardist/stardist" + doi: "10.1109/ISBIC56247.2022.9854534" + licence: + - "BSD 3-Clause" + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - image: + type: file + description: Single channel nuclear image + pattern: "*.{tiff,tif}" + ontologies: [] + - - model_name: + type: string + description: | + Name of a pretrained StarDist model (e.g. '2D_versatile_fluo', + '2D_versatile_he'). Used when model_path is not provided. + Pass '' (empty string) if providing a custom model path or + passing -m via ext.args. + - model_path: + type: file + description: | + Optional path to a custom StarDist model directory. When provided, + takes precedence over model_name. Pass [] (empty list) to use a + pretrained model name instead. + pattern: "*" + ontologies: [] +output: + mask: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.stardist.tif": + type: file + description: labelled mask output from stardist in tif format. + pattern: "*.{tiff,tif}" + ontologies: [] + versions_stardist: + - - ${task.process}: + type: string + description: The name of the process + - stardist: + type: string + description: The name of the tool + - "pip show stardist | sed -n 's/^Version: //p'": + type: eval + description: The expression to obtain the version of the tool + versions_python: + - - ${task.process}: + type: string + description: The name of the process + - python: + type: string + description: The name of the tool + - python --version | sed 's/Python //': + type: eval + description: The expression to obtain the version of the tool + versions_tensorflow: + - - ${task.process}: + type: string + description: The name of the process + - tensorflow: + type: string + description: The name of the tool + - "pip show tensorflow | sed -n 's/^Version: //p'": + type: eval + description: The expression to obtain the version of the tool + versions_tifffile: + - - ${task.process}: + type: string + description: The name of the process + - tifffile: + type: string + description: The name of the tool + - "pip show tifffile | sed -n 's/^Version: //p'": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - stardist: + type: string + description: The name of the tool + - "pip show stardist | sed -n 's/^Version: //p'": + type: eval + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: The name of the process + - python: + type: string + description: The name of the tool + - python --version | sed 's/Python //': + type: eval + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: The name of the process + - tensorflow: + type: string + description: The name of the tool + - "pip show tensorflow | sed -n 's/^Version: //p'": + type: eval + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: The name of the process + - tifffile: + type: string + description: The name of the tool + - "pip show tifffile | sed -n 's/^Version: //p'": + type: eval + description: The expression to obtain the version of the tool +authors: + - "@migueLib" + - "@dongzehe" +maintainers: + - "@migueLib" +notes: | + GPU support: The container (built via Seqera Containers) includes TensorFlow 2.20.0 + with CUDA support and falls back to CPU automatically when no GPU is available. + Use the `process_gpu` label to request GPU resources from your executor. + When running with conda/mamba, GPU support depends on having a CUDA-enabled + TensorFlow installation in your environment. + + Model selection via the model input channel [model_name, model_path]: + - Pretrained model: [ '2D_versatile_fluo', [] ] + - Custom model directory: [ '', file("/path/to/model") ] + - Via ext.args only: [ '', [] ] (then set ext.args = '-m ...') + + Additional stardist CLI arguments can be passed via `task.ext.args`: + ext.args = '--n_tiles 4,4 --prob_thresh 0.5 --nms_thresh 0.3' + + Model weights are not bundled in the container. StarDist downloads pretrained + models on first use to ~/.keras/models/. diff --git a/modules/nf-core/stardist/stardist.diff b/modules/nf-core/stardist/stardist.diff new file mode 100644 index 00000000..aa1a361b --- /dev/null +++ b/modules/nf-core/stardist/stardist.diff @@ -0,0 +1,13 @@ +Changes in module 'nf-core/stardist' +--- modules/nf-core/stardist/main.nf ++++ modules/nf-core/stardist/main.nf +@@ -1,7 +1,7 @@ + process STARDIST { + tag "$meta.id" +- label 'process_medium' +- label 'process_gpu' ++ label 'process_high' ++ label 'process_gpu_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/nf-core/stardist/tests/main.nf.test b/modules/nf-core/stardist/tests/main.nf.test new file mode 100644 index 00000000..5b3e2089 --- /dev/null +++ b/modules/nf-core/stardist/tests/main.nf.test @@ -0,0 +1,59 @@ +nextflow_process { + name "Test Process STARDIST" + script "../main.nf" + process "STARDIST" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "stardist" + + test("stardist2d - tif") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'imaging/segmentation/nuclear_image.tif', checkIfExists: true) + ] + input[1] = [ '2D_versatile_fluo', [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("stardist2d - tif - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'imaging/segmentation/nuclear_image.tif', checkIfExists: true) + ] + input[1] = [ '2D_versatile_fluo', [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/stardist/tests/main.nf.test.snap b/modules/nf-core/stardist/tests/main.nf.test.snap new file mode 100644 index 00000000..acbbc472 --- /dev/null +++ b/modules/nf-core/stardist/tests/main.nf.test.snap @@ -0,0 +1,168 @@ +{ + "stardist2d - tif - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.stardist.tif:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "STARDIST", + "stardist", + "0.9.2" + ] + ], + "2": [ + [ + "STARDIST", + "python", + "3.12.12" + ] + ], + "3": [ + [ + "STARDIST", + "tensorflow", + "2.20.0" + ] + ], + "4": [ + [ + "STARDIST", + "tifffile", + "2026.3.3" + ] + ], + "mask": [ + [ + { + "id": "test" + }, + "test.stardist.tif:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_python": [ + [ + "STARDIST", + "python", + "3.12.12" + ] + ], + "versions_stardist": [ + [ + "STARDIST", + "stardist", + "0.9.2" + ] + ], + "versions_tensorflow": [ + [ + "STARDIST", + "tensorflow", + "2.20.0" + ] + ], + "versions_tifffile": [ + [ + "STARDIST", + "tifffile", + "2026.3.3" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-03-04T22:33:02.205105908" + }, + "stardist2d - tif": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "nuclear_image.stardist.tif:md5,ca29a05346d9e0d4c15bb133122560ce" + ] + ], + "1": [ + [ + "STARDIST", + "stardist", + "0.9.2" + ] + ], + "2": [ + [ + "STARDIST", + "python", + "3.12.12" + ] + ], + "3": [ + [ + "STARDIST", + "tensorflow", + "2.20.0" + ] + ], + "4": [ + [ + "STARDIST", + "tifffile", + "2026.3.3" + ] + ], + "mask": [ + [ + { + "id": "test" + }, + "nuclear_image.stardist.tif:md5,ca29a05346d9e0d4c15bb133122560ce" + ] + ], + "versions_python": [ + [ + "STARDIST", + "python", + "3.12.12" + ] + ], + "versions_stardist": [ + [ + "STARDIST", + "stardist", + "0.9.2" + ] + ], + "versions_tensorflow": [ + [ + "STARDIST", + "tensorflow", + "2.20.0" + ] + ], + "versions_tifffile": [ + [ + "STARDIST", + "tifffile", + "2026.3.3" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-03-06T15:21:31.45100204" + } +} diff --git a/modules/nf-core/stardist/tests/nextflow.config b/modules/nf-core/stardist/tests/nextflow.config new file mode 100644 index 00000000..5926218d --- /dev/null +++ b/modules/nf-core/stardist/tests/nextflow.config @@ -0,0 +1,3 @@ +docker { + fixOwnership = true +} diff --git a/modules/nf-core/untar/environment.yml b/modules/nf-core/untar/environment.yml new file mode 100644 index 00000000..9b926b1f --- /dev/null +++ b/modules/nf-core/untar/environment.yml @@ -0,0 +1,12 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::coreutils=9.5 + - conda-forge::grep=3.11 + - conda-forge::gzip=1.13 + - conda-forge::lbzip2=2.5 + - conda-forge::sed=4.8 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf new file mode 100644 index 00000000..dc9a056f --- /dev/null +++ b/modules/nf-core/untar/main.nf @@ -0,0 +1,93 @@ +process UNTAR { + tag "${archive}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/52/52ccce28d2ab928ab862e25aae26314d69c8e38bd41ca9431c67ef05221348aa/data' + : 'community.wave.seqera.io/library/coreutils_grep_gzip_lbzip2_pruned:838ba80435a629f8'}" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("${prefix}"), emit: untar + tuple val("${task.process}"), val('untar'), eval('tar --version 2>&1 | head -1 | sed "s/tar (GNU tar) //; s/ Copyright.*//"'), emit: versions_untar, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: (meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "")) + + """ + mkdir ${prefix} + + ## Ensures --strip-components only applied when top level of tar contents is a directory + ## If just files or multiple directories, place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + tar \\ + -C ${prefix} --strip-components 1 \\ + -xavf \\ + ${args} \\ + ${archive} \\ + ${args2} + else + tar \\ + -C ${prefix} \\ + -xavf \\ + ${args} \\ + ${archive} \\ + ${args2} + fi + + """ + + stub: + prefix = task.ext.prefix ?: (meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) + """ + mkdir ${prefix} + ## Emit a valid empty file. nf-test's snapshot md5 path decompresses .gz files, + ## so a 0-byte .gz (from `touch`) throws EOF in GZIPInputStream and the snapshot + ## falls back to dumping non-deterministic File metadata (freeSpace, etc). + ## `: | gzip -n` produces a 20-byte deterministic empty gzip that md5s consistently. + emit_stub() { + local f="\$1" + if [[ "\$f" == *.gz ]]; then + : | gzip -n > "\$f" + else + touch "\$f" + fi + } + + ## Dry-run untaring the archive to get the files and place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + ## Single top-level dir in the archive: mirror `--strip-components 1` from the real + ## extraction so the stubbed file layout matches what a real run would produce. + for i in `tar -tf ${archive}`; + do + stripped=\${i#*/} + [ -z "\$stripped" ] && continue + if [[ \$(echo "\${i}" | grep -E "/\$") == "" ]]; + then + mkdir -p ${prefix}/\$(dirname "\$stripped") + emit_stub ${prefix}/\$stripped + else + mkdir -p ${prefix}/\$stripped + fi + done + else + for i in `tar -tf ${archive}`; + do + if [[ \$(echo "\${i}" | grep -E "/\$") == "" ]]; + then + emit_stub ${prefix}/\${i} + else + mkdir -p ${prefix}/\${i} + fi + done + fi + """ +} diff --git a/modules/nf-core/untar/meta.yml b/modules/nf-core/untar/meta.yml new file mode 100644 index 00000000..571d8078 --- /dev/null +++ b/modules/nf-core/untar/meta.yml @@ -0,0 +1,73 @@ +name: untar +description: Extract files from tar, tar.gz, tar.bz2, tar.xz archives +keywords: + - untar + - uncompress + - extract +tools: + - untar: + description: | + Extract tar, tar.gz, tar.bz2, tar.xz files. + documentation: https://www.gnu.org/software/tar/manual/ + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be untarred + pattern: "*.{tar,tar.gz,tar.bz2,tar.xz}" + ontologies: + - edam: http://edamontology.org/format_3981 # TAR format + - edam: http://edamontology.org/format_3989 # GZIP format +output: + untar: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*/" + - ${prefix}: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*/" + versions_untar: + - - ${task.process}: + type: string + description: The name of the process + - untar: + type: string + description: The name of the tool + - tar --version 2>&1 | head -1 | sed "s/tar (GNU tar) //; s/ Copyright.*//": + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - untar: + type: string + description: The name of the tool + - tar --version 2>&1 | head -1 | sed "s/tar (GNU tar) //; s/ Copyright.*//": + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" diff --git a/modules/nf-core/untar/tests/main.nf.test b/modules/nf-core/untar/tests/main.nf.test new file mode 100644 index 00000000..c957517a --- /dev/null +++ b/modules/nf-core/untar/tests/main.nf.test @@ -0,0 +1,85 @@ +nextflow_process { + + name "Test Process UNTAR" + script "../main.nf" + process "UNTAR" + tag "modules" + tag "modules_nfcore" + tag "untar" + + test("test_untar") { + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } + + test("test_untar_onlyfiles") { + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } + + test("test_untar - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } + + test("test_untar_onlyfiles - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } +} diff --git a/modules/nf-core/untar/tests/main.nf.test.snap b/modules/nf-core/untar/tests/main.nf.test.snap new file mode 100644 index 00000000..ceb91b79 --- /dev/null +++ b/modules/nf-core/untar/tests/main.nf.test.snap @@ -0,0 +1,158 @@ +{ + "test_untar_onlyfiles": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hello.txt:md5,e59ff97941044f85df5297e1c302d260" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hello.txt:md5,e59ff97941044f85df5297e1c302d260" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:28.231047" + }, + "test_untar_onlyfiles - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hello.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hello.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:45.773103" + }, + "test_untar - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:36.777441" + }, + "test_untar": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", + "opts.k2d:md5,a033d00cf6759407010b21700938f543", + "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", + "opts.k2d:md5,a033d00cf6759407010b21700938f543", + "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:19.377674" + } +} \ No newline at end of file diff --git a/modules/nf-core/untar/untar.diff b/modules/nf-core/untar/untar.diff new file mode 100644 index 00000000..71fea68f --- /dev/null +++ b/modules/nf-core/untar/untar.diff @@ -0,0 +1,54 @@ +Changes in component 'nf-core/untar' +Upstream git_sha: 447f7bc0fa41dfc2400c8cad4c0291880dc060cf +'modules/nf-core/untar/meta.yml' is unchanged +Changes in 'untar/main.nf': +--- modules/nf-core/untar/main.nf ++++ modules/nf-core/untar/main.nf +@@ -49,15 +49,33 @@ + prefix = task.ext.prefix ?: (meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) + """ + mkdir ${prefix} ++ ## Emit a valid empty file. nf-test's snapshot md5 path decompresses .gz files, ++ ## so a 0-byte .gz (from `touch`) throws EOF in GZIPInputStream and the snapshot ++ ## falls back to dumping non-deterministic File metadata (freeSpace, etc). ++ ## `: | gzip -n` produces a 20-byte deterministic empty gzip that md5s consistently. ++ emit_stub() { ++ local f="\$1" ++ if [[ "\$f" == *.gz ]]; then ++ : | gzip -n > "\$f" ++ else ++ touch "\$f" ++ fi ++ } ++ + ## Dry-run untaring the archive to get the files and place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then ++ ## Single top-level dir in the archive: mirror `--strip-components 1` from the real ++ ## extraction so the stubbed file layout matches what a real run would produce. + for i in `tar -tf ${archive}`; + do ++ stripped=\${i#*/} ++ [ -z "\$stripped" ] && continue + if [[ \$(echo "\${i}" | grep -E "/\$") == "" ]]; + then +- touch \${i} ++ mkdir -p ${prefix}/\$(dirname "\$stripped") ++ emit_stub ${prefix}/\$stripped + else +- mkdir -p \${i} ++ mkdir -p ${prefix}/\$stripped + fi + done + else +@@ -65,7 +83,7 @@ + do + if [[ \$(echo "\${i}" | grep -E "/\$") == "" ]]; + then +- touch ${prefix}/\${i} ++ emit_stub ${prefix}/\${i} + else + mkdir -p ${prefix}/\${i} + fi + +'modules/nf-core/untar/environment.yml' is unchanged +************************************************************ diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/unzip/environment.yml similarity index 85% rename from modules/nf-core/fastqc/environment.yml rename to modules/nf-core/unzip/environment.yml index f9f54ee9..24615895 100644 --- a/modules/nf-core/fastqc/environment.yml +++ b/modules/nf-core/unzip/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::fastqc=0.12.1 + - conda-forge::p7zip=16.02 diff --git a/modules/nf-core/unzip/main.nf b/modules/nf-core/unzip/main.nf new file mode 100644 index 00000000..d9417fb3 --- /dev/null +++ b/modules/nf-core/unzip/main.nf @@ -0,0 +1,50 @@ +process UNZIP { + tag "$archive" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/p7zip:16.02' : + 'biocontainers/p7zip:16.02' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("${prefix}/"), emit: unzipped_archive + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + if ( archive instanceof List && archive.name.size > 1 ) { error "[UNZIP] error: 7za only accepts a single archive as input. Please check module input." } + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName) + """ + 7za \\ + x \\ + -o"${prefix}"/ \\ + $args \\ + $archive + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + 7za: \$(echo \$(7za --help) | sed 's/.*p7zip Version //; s/(.*//') + END_VERSIONS + """ + + stub: + if ( archive instanceof List && archive.name.size > 1 ) { error "[UNZIP] error: 7za only accepts a single archive as input. Please check module input." } + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName) + """ + mkdir -p "${prefix}/morphology_focus" + touch "${prefix}/transcripts.parquet" + touch "${prefix}/morphology_focus/morphology_focus_0000.ome.tif" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + 7za: \$(echo \$(7za --help) | sed 's/.*p7zip Version //; s/(.*//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/unzip/meta.yml b/modules/nf-core/unzip/meta.yml new file mode 100644 index 00000000..ba1eb912 --- /dev/null +++ b/modules/nf-core/unzip/meta.yml @@ -0,0 +1,50 @@ +name: unzip +description: Unzip ZIP archive files +keywords: + - unzip + - decompression + - zip + - archiving +tools: + - unzip: + description: p7zip is a quick port of 7z.exe and 7za.exe (command line version + of 7zip, see www.7-zip.org) for Unix. + homepage: https://sourceforge.net/projects/p7zip/ + documentation: https://sourceforge.net/projects/p7zip/ + tool_dev_url: https://sourceforge.net/projects/p7zip" + licence: ["LGPL-2.1-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: ZIP file + pattern: "*.zip" + ontologies: + - edam: http://edamontology.org/format_3987 # ZIP format +output: + unzipped_archive: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/: + type: directory + description: Directory contents of the unzipped archive + pattern: "${archive.baseName}/" + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/unzip/tests/main.nf.test b/modules/nf-core/unzip/tests/main.nf.test new file mode 100644 index 00000000..238b68d8 --- /dev/null +++ b/modules/nf-core/unzip/tests/main.nf.test @@ -0,0 +1,54 @@ +nextflow_process { + + name "Test Process UNZIP" + script "../main.nf" + process "UNZIP" + + tag "modules" + tag "modules_nfcore" + tag "unzip" + + test("generic [tar] [tar_gz]") { + + when { + process { + """ + input[0] = [ + [ id: 'hello' ], + file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("generic [tar] [tar_gz] stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id: 'hello' ], + file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/unzip/tests/main.nf.test.snap b/modules/nf-core/unzip/tests/main.nf.test.snap new file mode 100644 index 00000000..cdd2ab16 --- /dev/null +++ b/modules/nf-core/unzip/tests/main.nf.test.snap @@ -0,0 +1,76 @@ +{ + "generic [tar] [tar_gz] stub": { + "content": [ + { + "0": [ + [ + { + "id": "hello" + }, + [ + + ] + ] + ], + "1": [ + "versions.yml:md5,52c55ce814e8bc9edc5a6c625ed794b8" + ], + "unzipped_archive": [ + [ + { + "id": "hello" + }, + [ + + ] + ] + ], + "versions": [ + "versions.yml:md5,52c55ce814e8bc9edc5a6c625ed794b8" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-30T19:16:37.11550986" + }, + "generic [tar] [tar_gz]": { + "content": [ + { + "0": [ + [ + { + "id": "hello" + }, + [ + "hello.tar:md5,80c66db79a773bc87b3346035ff9593e" + ] + ] + ], + "1": [ + "versions.yml:md5,52c55ce814e8bc9edc5a6c625ed794b8" + ], + "unzipped_archive": [ + [ + { + "id": "hello" + }, + [ + "hello.tar:md5,80c66db79a773bc87b3346035ff9593e" + ] + ] + ], + "versions": [ + "versions.yml:md5,52c55ce814e8bc9edc5a6c625ed794b8" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-30T19:16:25.120242571" + } +} \ No newline at end of file diff --git a/modules/nf-core/unzip/unzip.diff b/modules/nf-core/unzip/unzip.diff new file mode 100644 index 00000000..e4a293c0 --- /dev/null +++ b/modules/nf-core/unzip/unzip.diff @@ -0,0 +1,18 @@ +Changes in component 'nf-core/unzip' +'modules/nf-core/unzip/environment.yml' is unchanged +'modules/nf-core/unzip/meta.yml' is unchanged +Changes in 'unzip/main.nf': +--- modules/nf-core/unzip/main.nf ++++ modules/nf-core/unzip/main.nf +@@ -38,7 +38,9 @@ + if ( archive instanceof List && archive.name.size > 1 ) { error "[UNZIP] error: 7za only accepts a single archive as input. Please check module input." } + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName) + """ +- mkdir "${prefix}" ++ mkdir -p "${prefix}/morphology_focus" ++ touch "${prefix}/transcripts.parquet" ++ touch "${prefix}/morphology_focus/morphology_focus_0000.ome.tif" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": +************************************************************ diff --git a/modules/nf-core/xeniumranger/import-segmentation/main.nf b/modules/nf-core/xeniumranger/import-segmentation/main.nf new file mode 100644 index 00000000..264b8a72 --- /dev/null +++ b/modules/nf-core/xeniumranger/import-segmentation/main.nf @@ -0,0 +1,68 @@ +process XENIUMRANGER_IMPORT_SEGMENTATION { + tag "$meta.id" + label 'process_high' + + container "nf-core/xeniumranger:4.0" + + input: + tuple val(meta), path(xenium_bundle, stageAs: "bundle/"), path(transcript_assignment), path(viz_polygons), path(nuclei), path(cells), path(coordinate_transform), val(units) + + output: + tuple val(meta), path("${prefix}"), emit: outs + tuple val("${task.process}"), val("xeniumranger"), eval("xeniumranger -V | sed -e 's/.*xenium-//'"), emit: versions_xeniumranger, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "XENIUMRANGER_IMPORT_SEGMENTATION module does not support Conda. Please use Docker / Singularity / Podman instead." + } + + prefix = task.ext.prefix ?: "${meta.id}" + + // nuclei and cells are for image segmentation results + // transcript_assignment and viz_polygons are for transcript assignment results + // they are mutually exclusive + if ((nuclei || cells) && (transcript_assignment || viz_polygons)) { + error "--nuclei and --cells are for image segmentation results, which are mutually exclusive with --transcript-assignment and --viz-polygons for transcript assignment results. Please use only one of them." + } + + def assembled_args = [] + if (task.ext.args) { assembled_args << task.ext.args.trim() } + if (nuclei) { assembled_args << "--nuclei=\"${nuclei}\"" } + if (cells) { assembled_args << "--cells=\"${cells}\"" } + if (transcript_assignment) { assembled_args << "--transcript-assignment=\"${transcript_assignment}\"" } + if (viz_polygons) { assembled_args << "--viz-polygons=\"${viz_polygons}\"" } + if (coordinate_transform) { + assembled_args << "--coordinate-transform=\"${coordinate_transform}\"" + // if coordinate_transform is provided, units must be microns + assembled_args << "--units=\"microns\"" + } else if (units) { + assembled_args << "--units=\"${units}\"" + } + + def args = assembled_args ? assembled_args.join(" \\\n ") : "" + + """ + xeniumranger import-segmentation \\ + --id="XENIUMRANGER_IMPORT_SEGMENTATION" \\ + --xenium-bundle="${xenium_bundle}" \\ + --localcores=${task.cpus} \\ + --localmem=${task.memory.toGiga()} \\ + ${args} + + rm -rf "${prefix}" + mv XENIUMRANGER_IMPORT_SEGMENTATION/outs "${prefix}" + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir -p "${prefix}" + touch "${prefix}/experiment.xenium" + """ + +} diff --git a/modules/nf-core/xeniumranger/import-segmentation/meta.yml b/modules/nf-core/xeniumranger/import-segmentation/meta.yml new file mode 100644 index 00000000..e222df58 --- /dev/null +++ b/modules/nf-core/xeniumranger/import-segmentation/meta.yml @@ -0,0 +1,141 @@ +name: xeniumranger_import_segmentation +description: | + The xeniumranger import-segmentation module runs `xeniumranger import-segmentation` + to recompute Xenium Onboard Analysis outputs using external segmentation results. + It supports two execution modes mirroring the Xenium Ranger CLI: an image-based + mode that accepts nuclei and/or cell masks (TIFF/NPY) or GeoJSON polygons together + with optional coordinate transforms and unit definitions, and a transcript-based + mode that ingests Baysor-style transcript assignment CSV files plus visualization + polygons. Use the image-based inputs when providing label masks or polygons, or + switch to the transcript-based inputs when supplying transcript-level assignments + so the appropriate command-line arguments are passed to Xenium Ranger. +keywords: + - spatial + - segmentation + - import segmentation + - nuclear segmentation + - cell segmentation + - xeniumranger + - imaging +tools: + - xeniumranger: + description: | + Xenium Ranger is a set of analysis pipelines that process Xenium In Situ Gene Expression data to relabel, resegment, or import new segmentation results from community-developed tools. Xenium Ranger provides flexible off-instrument reanalysis of Xenium In Situ data. Relabel transcripts, resegment cells with the latest 10x segmentation algorithms, or import your own segmentation data to assign transcripts to cells. + homepage: "https://www.10xgenomics.com/support/software/xenium-ranger/latest" + documentation: "https://www.10xgenomics.com/support/software/xenium-ranger/latest/getting-started" + tool_dev_url: "https://www.10xgenomics.com/support/software/xenium-ranger/latest/analysis" + licence: + - "10x Genomics EULA" + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing run information + e.g. [ id:'xenium_sample' ] + - xenium_bundle: + type: directory + description: Path to the Xenium output bundle generated by the Xenium + Onboard Analysis pipeline + - transcript_assignment: + type: file + optional: true + description: | + Transcript assignment CSV with cell assignment, such as from Baysor v0.6, (transcript-based mode). + Mutually exclusive with image-based inputs (`nuclei`, `cells`). Required when using + transcript-based mode. Passed to `--transcript-assignment`. + pattern: "*.csv" + ontologies: + - edam: http://edamontology.org/format_3752 # CSV + - viz_polygons: + type: file + optional: true + description: | + Cell boundary polygons (GeoJSON) for visualization, such as from Baysor v0.6 (transcript-based mode). + Mutually exclusive with image-based inputs (`nuclei`, `cells`). Required when using + `transcript_assignment`. Passed to `--viz-polygons`. + pattern: "*.{json,geojson}" + ontologies: + - edam: http://edamontology.org/format_3464 # JSON + - nuclei: + type: file + optional: true + description: | + Nucleus segmentation input as label mask (TIFF/NPY), polygons (GeoJSON), or Xenium Onboard + Analysis cells.zarr.zip (image-based mode). + Mutually exclusive with transcript-based inputs + (`transcript_assignment`, `viz_polygons`). Passed to `--nuclei`. + pattern: "*.{tif,tiff,npy,json,geojson,zarr.zip}" + ontologies: + - edam: http://edamontology.org/format_4003 # NumPy format + - edam: http://edamontology.org/format_3464 # JSON + - cells: + type: file + optional: true + description: | + Cell segmentation input as label mask (TIFF/NPY), polygons (GeoJSON), or Xenium Onboard + Analysis cells.zarr.zip (image-based mode). + Mutually exclusive with transcript-based inputs + (`transcript_assignment`, `viz_polygons`). Passed to `--cells`. + pattern: "*.{tif,tiff,npy,json,geojson,zarr.zip}" + ontologies: + - edam: http://edamontology.org/format_4003 # NumPy format + - edam: http://edamontology.org/format_3464 # JSON + - coordinate_transform: + type: file + optional: true + description: | + Image alignment file containing similarity transform matrix (e.g., `_imagealignment.csv` from + Xenium Explorer). Only used with image-based mode inputs (`nuclei`, `cells`). `units` will be automatically set to "microns". Passed to `--coordinate-transform`. + pattern: "*.csv" + ontologies: + - edam: http://edamontology.org/format_3752 # CSV + - units: + type: string + optional: true + description: | + Units for segmentation results. Must be one of two options: "microns" (physical space) or + "pixels" (pixel space). Can be used with both image-based and transcript-based modes. + Default: "pixels". Must be "microns" if `coordinate_transform` is used. For Baysor v0.6 + inputs, must be "microns". Passed to `--units`. + enum: + - "microns" + - "pixels" +output: + outs: + - - meta: + type: map + description: Groovy Map containing sample information e.g. [ id:'test' ] + - ${prefix}: + type: directory + description: Directory containing the output xenium bundle of Xenium + Ranger + pattern: "${prefix}" + versions_xeniumranger: + - - ${task.process}: + type: string + description: The process the versions were collected from + - xeniumranger: + type: string + description: The tool name + - xeniumranger -V | sed -e 's/.*xenium-//': + type: string + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - xeniumranger: + type: string + description: The tool name + - xeniumranger -V | sed -e 's/.*xenium-//': + type: string + description: The command used to generate the version of the tool +authors: + - "@khersameesh24" + - "@dongzehe" +maintainers: + - "@khersameesh24" + - "@dongzehe" diff --git a/modules/nf-core/xeniumranger/import-segmentation/tests/main.nf.test b/modules/nf-core/xeniumranger/import-segmentation/tests/main.nf.test new file mode 100644 index 00000000..71198099 --- /dev/null +++ b/modules/nf-core/xeniumranger/import-segmentation/tests/main.nf.test @@ -0,0 +1,314 @@ +nextflow_process { + + name "Test Process XENIUMRANGER_IMPORT_SEGMENTATION" + script "../main.nf" + process "XENIUMRANGER_IMPORT_SEGMENTATION" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "xeniumranger" + tag "xeniumranger/import-segmentation" + tag "unzip" + + setup { + run("UNZIP") { + script "modules/nf-core/unzip/main.nf" + process { + """ + input[0] = [[], file('https://raw.githubusercontent.com/nf-core/test-datasets/spatialaxe/Xenium_Prime_Mouse_Ileum_tiny_outs.zip', checkIfExists: true)] + """ + } + } + } + + test("xeniumranger import-segmentation nuclei npy") { + when { + process { + """ + input[0] = channel.of([ + [id: "test_xeniumranger_import-segmentation"], + ]).combine(UNZIP.out.unzipped_archive.map { it[1] }) + input[1] = 0 + input[2] = [] + input[3] = UNZIP.out.unzipped_archive.map { it[1] } + "/segmentations/nuclei.npy" + input[4] = [] + input[5] = [] + input[6] = [] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + process.out.outs.get(0).get(1).findAll { file(it).name !in [ + 'analysis_summary.html', + 'metrics_summary.csv', + 'cell_boundaries.csv.gz', + 'cell_boundaries.parquet', + 'nucleus_boundaries.csv.gz', + 'nucleus_boundaries.parquet', + 'cells.csv.gz', + 'cells.parquet', + 'cells.zarr.zip', + 'transcripts.parquet', + 'transcripts.zarr.zip', + 'clusters.csv', + 'differential_expression.csv', + 'components.csv', + 'projection.csv', + 'variance.csv', + 'analysis.zarr.zip', + 'experiment.xenium', + 'cell_feature_matrix.zarr.zip' + ]} + ).match() + }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'analysis_summary.html' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'metrics_summary.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cell_boundaries.csv.gz' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cell_boundaries.parquet' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'nucleus_boundaries.csv.gz' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'nucleus_boundaries.parquet' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cells.csv.gz' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cells.parquet' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cells.zarr.zip' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'transcripts.parquet' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'clusters.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'differential_expression.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'components.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'projection.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'variance.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'analysis.zarr.zip' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'experiment.xenium' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cell_feature_matrix.zarr.zip' }).exists() }, + ) + } + } + + + test("xeniumranger import-segmentation nuclei tif") { + when { + process { + """ + input[0] = channel.of([ + [id: "test_xeniumranger_import-segmentation"], + ]).combine(UNZIP.out.unzipped_archive.map { it[1] }) + input[1] = 0 + input[2] = [] + input[3] = UNZIP.out.unzipped_archive.map { it[1] } + "/segmentations/nuclei.npy" + input[4] = [] + input[5] = [] + input[6] = [] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + process.out.outs.get(0).get(1).findAll { file(it).name !in [ + 'analysis_summary.html', + 'metrics_summary.csv', + 'cell_boundaries.csv.gz', + 'cell_boundaries.parquet', + 'nucleus_boundaries.csv.gz', + 'nucleus_boundaries.parquet', + 'cells.csv.gz', + 'cells.parquet', + 'cells.zarr.zip', + 'transcripts.parquet', + 'transcripts.zarr.zip', + 'clusters.csv', + 'differential_expression.csv', + 'components.csv', + 'projection.csv', + 'variance.csv', + 'analysis.zarr.zip', + 'experiment.xenium', + 'cell_feature_matrix.zarr.zip' + ]} + ).match() + }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'analysis_summary.html' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'metrics_summary.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cell_boundaries.csv.gz' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cell_boundaries.parquet' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'nucleus_boundaries.csv.gz' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'nucleus_boundaries.parquet' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cells.csv.gz' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cells.parquet' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cells.zarr.zip' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'transcripts.parquet' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'clusters.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'differential_expression.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'components.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'projection.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'variance.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'analysis.zarr.zip' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'experiment.xenium' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cell_feature_matrix.zarr.zip' }).exists() }, + ) + } + } + + test("xeniumranger import-segmentation segmentation csv") { + when { + process { + """ + input[0] = channel.of([ + [id: "test_xeniumranger_import-segmentation"], + ]).combine(UNZIP.out.unzipped_archive.map { it[1] }) + input[1] = 0 + input[2] = UNZIP.out.unzipped_archive.map { it[1] } + "/segmentations/imagealignment.csv" + input[3] = [] + input[4] = [] + input[5] = UNZIP.out.unzipped_archive.map { it[1] } + "/segmentations/segmentation.csv" + input[6] = UNZIP.out.unzipped_archive.map { it[1] } + "/segmentations/segmentation_polygons.json" + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + process.out.outs.get(0).get(1).findAll { file(it).name !in [ + 'analysis_summary.html', + 'metrics_summary.csv', + 'cell_boundaries.csv.gz', + 'cell_boundaries.parquet', + 'nucleus_boundaries.csv.gz', + 'nucleus_boundaries.parquet', + 'cells.csv.gz', + 'cells.parquet', + 'cells.zarr.zip', + 'transcripts.parquet', + 'transcripts.zarr.zip', + 'clusters.csv', + 'differential_expression.csv', + 'components.csv', + 'projection.csv', + 'variance.csv', + 'analysis.zarr.zip', + 'experiment.xenium', + 'cell_feature_matrix.zarr.zip' + ]} + ).match() + }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'analysis_summary.html' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'metrics_summary.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cell_boundaries.csv.gz' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cell_boundaries.parquet' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'nucleus_boundaries.csv.gz' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'nucleus_boundaries.parquet' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cells.csv.gz' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cells.parquet' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cells.zarr.zip' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'transcripts.parquet' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'clusters.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'differential_expression.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'components.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'projection.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'variance.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'analysis.zarr.zip' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'experiment.xenium' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cell_feature_matrix.zarr.zip' }).exists() }, + ) + } + } + + test("xeniumranger import-segmentation") { + when { + process { + """ + input[0] = channel.of([ + [id: "test_xeniumranger_import-segmentation"], + ]).combine(UNZIP.out.unzipped_archive.map { it[1] }) + input[1] = 0 + input[2] = [] + input[3] = UNZIP.out.unzipped_archive.map { it[1] } + "/cells.zarr.zip" + input[4] = [] + input[5] = [] + input[6] = [] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + process.out.outs.get(0).get(1).findAll { file(it).name !in [ + 'analysis_summary.html', + 'metrics_summary.csv', + 'cell_boundaries.csv.gz', + 'cell_boundaries.parquet', + 'nucleus_boundaries.csv.gz', + 'nucleus_boundaries.parquet', + 'cells.csv.gz', + 'cells.parquet', + 'cells.zarr.zip', + 'transcripts.parquet', + 'transcripts.zarr.zip', + 'clusters.csv', + 'differential_expression.csv', + 'components.csv', + 'projection.csv', + 'variance.csv', + 'analysis.zarr.zip', + 'experiment.xenium', + 'cell_feature_matrix.zarr.zip' + ]} + ).match() + }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'analysis_summary.html' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'metrics_summary.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cell_boundaries.csv.gz' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cell_boundaries.parquet' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'nucleus_boundaries.csv.gz' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'nucleus_boundaries.parquet' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cells.csv.gz' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cells.parquet' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cells.zarr.zip' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'transcripts.parquet' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'clusters.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'differential_expression.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'components.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'projection.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'variance.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'analysis.zarr.zip' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'experiment.xenium' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cell_feature_matrix.zarr.zip' }).exists() }, + ) + } + } + + test("xeniumranger import-segmentation stub") { + options "-stub" + when { + process { + """ + input[0] = channel.of([ + [id: "test_xeniumranger_import-segmentation"], + ]).combine(UNZIP.out.unzipped_archive.map { it[1] }) + input[1] = 0 + input[2] = [] + input[3] = UNZIP.out.unzipped_archive.map { it[1] } + "/cells.zarr.zip" + input[4] = [] + input[5] = [] + input[6] = [] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/xeniumranger/import-segmentation/tests/main.nf.test.snap b/modules/nf-core/xeniumranger/import-segmentation/tests/main.nf.test.snap new file mode 100644 index 00000000..1c312ae0 --- /dev/null +++ b/modules/nf-core/xeniumranger/import-segmentation/tests/main.nf.test.snap @@ -0,0 +1,127 @@ +{ + "xeniumranger import-segmentation": { + "content": [ + [ + "versions.yml:md5,d76e870d71abf94ed9ae972a08b83f63" + ], + [ + "dispersion.csv:md5,e8b1abb880ece8fb730ce34a15f958b4", + "features_selected.csv:md5,c5e32d69f001f938ed316d2108a21e00", + "cell_feature_matrix.h5:md5,96cb400f1b1dd6f8796daea0ad5c74e6", + "barcodes.tsv.gz:md5,04ea06796d6b28517c288904ca043582", + "features.tsv.gz:md5,7862242129681900a9cc4086dc83b62e", + "matrix.mtx.gz:md5,489f86fbd8d65d6b973bb9cc7c5a76f1", + "gene_panel.json:md5,8890dd5fd90706e751554ac3fdfdedde", + "morphology.ome.tif:md5,6b65fff28a38a001b8f25061737fbf9b", + "morphology_focus_0000.ome.tif:md5,90e796ad634d14e62cf2ebcadf2eaf98" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T00:13:13.575888" + }, + "xeniumranger import-segmentation nuclei npy": { + "content": [ + [ + "versions.yml:md5,d76e870d71abf94ed9ae972a08b83f63" + ], + [ + "dispersion.csv:md5,e8b1abb880ece8fb730ce34a15f958b4", + "features_selected.csv:md5,c5e32d69f001f938ed316d2108a21e00", + "cell_feature_matrix.h5:md5,96cb400f1b1dd6f8796daea0ad5c74e6", + "barcodes.tsv.gz:md5,04ea06796d6b28517c288904ca043582", + "features.tsv.gz:md5,7862242129681900a9cc4086dc83b62e", + "matrix.mtx.gz:md5,489f86fbd8d65d6b973bb9cc7c5a76f1", + "gene_panel.json:md5,8890dd5fd90706e751554ac3fdfdedde", + "morphology.ome.tif:md5,6b65fff28a38a001b8f25061737fbf9b", + "morphology_focus_0000.ome.tif:md5,90e796ad634d14e62cf2ebcadf2eaf98" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-29T23:03:26.726334" + }, + "xeniumranger import-segmentation segmentation csv": { + "content": [ + [ + "versions.yml:md5,d76e870d71abf94ed9ae972a08b83f63" + ], + [ + "dispersion.csv:md5,e8b1abb880ece8fb730ce34a15f958b4", + "features_selected.csv:md5,c5e32d69f001f938ed316d2108a21e00", + "cell_feature_matrix.h5:md5,5d74ea595561e0300b6c3e5ec8d06fff", + "barcodes.tsv.gz:md5,97496a9b448d9380cff0575b8e7a6f57", + "features.tsv.gz:md5,7862242129681900a9cc4086dc83b62e", + "matrix.mtx.gz:md5,f93ed82a2a74c154392fc6237642f1d2", + "gene_panel.json:md5,8890dd5fd90706e751554ac3fdfdedde", + "morphology.ome.tif:md5,6b65fff28a38a001b8f25061737fbf9b", + "morphology_focus_0000.ome.tif:md5,90e796ad634d14e62cf2ebcadf2eaf98" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-29T23:22:58.158857" + }, + "xeniumranger import-segmentation stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_xeniumranger_import-segmentation" + }, + "fake_file.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,d76e870d71abf94ed9ae972a08b83f63" + ], + "outs": [ + [ + { + "id": "test_xeniumranger_import-segmentation" + }, + "fake_file.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,d76e870d71abf94ed9ae972a08b83f63" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T22:49:39.204133" + }, + "xeniumranger import-segmentation nuclei tif": { + "content": [ + [ + "versions.yml:md5,d76e870d71abf94ed9ae972a08b83f63" + ], + [ + "dispersion.csv:md5,e8b1abb880ece8fb730ce34a15f958b4", + "features_selected.csv:md5,c5e32d69f001f938ed316d2108a21e00", + "cell_feature_matrix.h5:md5,96cb400f1b1dd6f8796daea0ad5c74e6", + "barcodes.tsv.gz:md5,04ea06796d6b28517c288904ca043582", + "features.tsv.gz:md5,7862242129681900a9cc4086dc83b62e", + "matrix.mtx.gz:md5,489f86fbd8d65d6b973bb9cc7c5a76f1", + "gene_panel.json:md5,8890dd5fd90706e751554ac3fdfdedde", + "morphology.ome.tif:md5,6b65fff28a38a001b8f25061737fbf9b", + "morphology_focus_0000.ome.tif:md5,90e796ad634d14e62cf2ebcadf2eaf98" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-29T23:11:37.18721" + } +} \ No newline at end of file diff --git a/modules/nf-core/xeniumranger/import-segmentation/tests/nextflow.config b/modules/nf-core/xeniumranger/import-segmentation/tests/nextflow.config new file mode 100644 index 00000000..e69de29b diff --git a/modules/nf-core/xeniumranger/import-segmentation/tests/tags.yml b/modules/nf-core/xeniumranger/import-segmentation/tests/tags.yml new file mode 100644 index 00000000..90c2b805 --- /dev/null +++ b/modules/nf-core/xeniumranger/import-segmentation/tests/tags.yml @@ -0,0 +1,2 @@ +xeniumranger/import-segmentation: + - "modules/nf-core/xeniumranger/import-segmentation/**" diff --git a/modules/nf-core/xeniumranger/relabel/main.nf b/modules/nf-core/xeniumranger/relabel/main.nf new file mode 100644 index 00000000..bf04a971 --- /dev/null +++ b/modules/nf-core/xeniumranger/relabel/main.nf @@ -0,0 +1,47 @@ +process XENIUMRANGER_RELABEL { + tag "$meta.id" + label 'process_high' + + container "nf-core/xeniumranger:4.0" + + input: + tuple val(meta), path(xenium_bundle, stageAs: "bundle/"), path(panel) + + output: + tuple val(meta), path("${prefix}"), emit: outs + tuple val("${task.process}"), val("xeniumranger"), eval("xeniumranger -V | sed -e 's/.*xenium-//'"), emit: versions_xeniumranger, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "XENIUMRANGER_RELABEL module does not support Conda. Please use Docker / Singularity / Podman instead." + } + + prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: "" + + """ + xeniumranger relabel \\ + --id="XENIUMRANGER_RELABEL" \\ + --xenium-bundle="${xenium_bundle}" \\ + --panel="${panel}" \\ + --localcores=${task.cpus} \\ + --localmem=${task.memory.toGiga()} \\ + ${args} + + rm -rf "${prefix}" + mv XENIUMRANGER_RELABEL/outs "${prefix}" + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p "${prefix}" + touch "${prefix}/experiment.xenium" + """ +} diff --git a/modules/nf-core/xeniumranger/relabel/meta.yml b/modules/nf-core/xeniumranger/relabel/meta.yml new file mode 100644 index 00000000..8b61e0bb --- /dev/null +++ b/modules/nf-core/xeniumranger/relabel/meta.yml @@ -0,0 +1,72 @@ +name: xeniumranger_relabel +description: The xeniumranger relabel module allows you to change the gene + labels applied to decoded transcripts. +keywords: + - spatial + - relabel + - gene labels + - transcripts + - xeniumranger +tools: + - xeniumranger: + description: | + Xenium Ranger is a set of analysis pipelines that process Xenium In Situ Gene Expression data to relabel, resegment, or import new segmentation results from community-developed tools. Xenium Ranger provides flexible off-instrument reanalysis of Xenium In Situ data. Relabel transcripts, resegment cells with the latest 10x segmentation algorithms, or import your own segmentation data to assign transcripts to cells. + homepage: "https://www.10xgenomics.com/support/software/xenium-ranger/latest" + documentation: "https://www.10xgenomics.com/support/software/xenium-ranger/latest/getting-started" + tool_dev_url: "https://www.10xgenomics.com/support/software/xenium-ranger/latest/analysis" + licence: + - "10x Genomics EULA" + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing run information + e.g. [ id:'xenium_bundle_path' ] + - xenium_bundle: + type: directory + description: Path to the xenium output bundle generated by the Xenium + Onboard Analysis pipeline + - panel: + type: file + description: Path to the gene panel file + pattern: "*.json" + ontologies: + - edam: http://edamontology.org/format_3464 # JSON +output: + outs: + - - meta: + type: map + description: Groovy Map containing sample information e.g. [ id:'test' ] + - "${prefix}": + type: directory + description: Directory containing the output xenium bundle of Xenium + Ranger + pattern: "${prefix}" + versions_xeniumranger: + - - "${task.process}": + type: string + description: The process the versions were collected from + - xeniumranger: + type: string + description: The tool name + - "xeniumranger -V | sed -e 's/.*xenium-//'": + type: string + description: The command used to generate the version of the tool +topics: + versions: + - - "${task.process}": + type: string + description: The process the versions were collected from + - xeniumranger: + type: string + description: The tool name + - "xeniumranger -V | sed -e 's/.*xenium-//'": + type: string + description: The command used to generate the version of the tool +authors: + - "@khersameesh24" + - "@dongzehe" +maintainers: + - "@khersameesh24" + - "@dongzehe" diff --git a/modules/nf-core/xeniumranger/relabel/tests/main.nf.test b/modules/nf-core/xeniumranger/relabel/tests/main.nf.test new file mode 100644 index 00000000..e00f1caa --- /dev/null +++ b/modules/nf-core/xeniumranger/relabel/tests/main.nf.test @@ -0,0 +1,93 @@ +nextflow_process { + + name "Test Process XENIUMRANGER_RELABEL" + script "../main.nf" + process "XENIUMRANGER_RELABEL" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "xeniumranger" + tag "xeniumranger/relabel" + tag "unzip" + + setup { + run("UNZIP") { + script "modules/nf-core/unzip/main.nf" + process { + """ + input[0] = [[], file('https://raw.githubusercontent.com/nf-core/test-datasets/spatialaxe/Xenium_Prime_Mouse_Ileum_tiny_outs.zip', checkIfExists: true)] + """ + } + } + } + + test("xeniumranger relabel") { + when { + process { + """ + input[0] = channel.of([ + [id: "test_xeniumranger_relabel"], + ]).combine(UNZIP.out.unzipped_archive.map { it[1] }) + input[1] = UNZIP.out.unzipped_archive.map { it[1] } + "/gene_panel.json" + """ + } + } + then { + assertAll( + { assert process.success }, + { assert process.out.outs != null }, + { + assert snapshot( + process.out.versions, + process.out.outs.get(0).get(1).findAll { file(it).name !in [ + "analysis.zarr.zip", + "experiment.xenium", + "transcripts.zarr.zip", + "analysis_summary.html", + "cell_feature_matrix.zarr.zip", + "differential_expression.csv", + "components.csv", + "projection.csv", + "variance.csv", + "metrics_summary.csv", + "clusters.csv" + ]} + ).match() + }, + { assert path(process.out.outs.get(0).get(1).find { file(it).name == 'analysis_summary.html' }).exists() }, + { assert path(process.out.outs.get(0).get(1).find { file(it).name == 'analysis.zarr.zip' }).exists() }, + { assert path(process.out.outs.get(0).get(1).find { file(it).name == 'experiment.xenium' }).exists() }, + { assert path(process.out.outs.get(0).get(1).find { file(it).name == 'transcripts.zarr.zip' }).exists() }, + { assert path(process.out.outs.get(0).get(1).find { file(it).name == 'analysis_summary.html' }).exists() }, + { assert path(process.out.outs.get(0).get(1).find { file(it).name == 'cell_feature_matrix.zarr.zip' }).exists() }, + { assert path(process.out.outs.get(0).get(1).find { file(it).name == 'differential_expression.csv' }).exists() }, + { assert path(process.out.outs.get(0).get(1).find { file(it).name == 'components.csv' }).exists() }, + { assert path(process.out.outs.get(0).get(1).find { file(it).name == 'projection.csv' }).exists() }, + { assert path(process.out.outs.get(0).get(1).find { file(it).name == 'variance.csv' }).exists() }, + { assert path(process.out.outs.get(0).get(1).find { file(it).name == 'metrics_summary.csv' }).exists() }, + { assert path(process.out.outs.get(0).get(1).find { file(it).name == 'clusters.csv' }).exists() }, + ) + } + } + + test("xeniumranger relabel stub") { + options "-stub" + when { + process { + """ + input[0] = channel.of([ + [id: "test_xeniumranger_relabel"], + ]).combine(UNZIP.out.unzipped_archive.map { it[1] }) + input[1] = UNZIP.out.unzipped_archive.map { it[1] } + "/gene_panel.json" + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/xeniumranger/relabel/tests/main.nf.test.snap b/modules/nf-core/xeniumranger/relabel/tests/main.nf.test.snap new file mode 100644 index 00000000..7c70cfc2 --- /dev/null +++ b/modules/nf-core/xeniumranger/relabel/tests/main.nf.test.snap @@ -0,0 +1,66 @@ +{ + "xeniumranger relabel": { + "content": [ + [ + "versions.yml:md5,ab2584177544560d5a9e9c36f7d24354" + ], + [ + "dispersion.csv:md5,e8b1abb880ece8fb730ce34a15f958b4", + "features_selected.csv:md5,c5e32d69f001f938ed316d2108a21e00", + "cell_boundaries.csv.gz:md5,8b4f2aa455a6fb14b2669a42db32ea7e", + "cell_boundaries.parquet:md5,e55d6a7fbec336103994baad8c8e4a9a", + "cell_feature_matrix.h5:md5,96cb400f1b1dd6f8796daea0ad5c74e6", + "barcodes.tsv.gz:md5,04ea06796d6b28517c288904ca043582", + "features.tsv.gz:md5,7862242129681900a9cc4086dc83b62e", + "matrix.mtx.gz:md5,489f86fbd8d65d6b973bb9cc7c5a76f1", + "cells.csv.gz:md5,3cef2d7cc8cfba1d47bdb7c65c3d5d5f", + "cells.parquet:md5,9b30b35ab961d2d243a1426e8dc980fe", + "cells.zarr.zip:md5,556e47d5b14150239b10b2f801defa2b", + "gene_panel.json:md5,8890dd5fd90706e751554ac3fdfdedde", + "morphology.ome.tif:md5,6b65fff28a38a001b8f25061737fbf9b", + "morphology_focus_0000.ome.tif:md5,90e796ad634d14e62cf2ebcadf2eaf98", + "nucleus_boundaries.csv.gz:md5,e417b6e293298870956d42c7106cbd0c", + "nucleus_boundaries.parquet:md5,bacbfc3c2e956d899e1d8ccba5dd7c5e", + "transcripts.parquet:md5,c0f40d5c61b87404bc9efb84ff0563a8" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-29T21:06:09.082129" + }, + "xeniumranger relabel stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_xeniumranger_relabel" + }, + "fake_file.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,ab2584177544560d5a9e9c36f7d24354" + ], + "outs": [ + [ + { + "id": "test_xeniumranger_relabel" + }, + "fake_file.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,ab2584177544560d5a9e9c36f7d24354" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-22T15:22:34.353444" + } +} \ No newline at end of file diff --git a/modules/nf-core/xeniumranger/relabel/tests/nextflow.config b/modules/nf-core/xeniumranger/relabel/tests/nextflow.config new file mode 100644 index 00000000..e69de29b diff --git a/modules/nf-core/xeniumranger/relabel/tests/tags.yml b/modules/nf-core/xeniumranger/relabel/tests/tags.yml new file mode 100644 index 00000000..1cb37a80 --- /dev/null +++ b/modules/nf-core/xeniumranger/relabel/tests/tags.yml @@ -0,0 +1,2 @@ +xeniumranger/relabel: + - "modules/nf-core/xeniumranger/relabel/**" diff --git a/modules/nf-core/xeniumranger/resegment/main.nf b/modules/nf-core/xeniumranger/resegment/main.nf new file mode 100644 index 00000000..d52eba0e --- /dev/null +++ b/modules/nf-core/xeniumranger/resegment/main.nf @@ -0,0 +1,45 @@ +process XENIUMRANGER_RESEGMENT { + tag "$meta.id" + label 'process_high' + + container "nf-core/xeniumranger:4.0" + + input: + tuple val(meta), path(xenium_bundle, stageAs: "bundle/") + + output: + tuple val(meta), path("${prefix}"), emit: outs + tuple val("${task.process}"), val("xeniumranger"), eval("xeniumranger -V | sed -e 's/.*xenium-//'"), emit: versions_xeniumranger, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "XENIUMRANGER_RESEGMENT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + + prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: "" + + """ + xeniumranger resegment \\ + --id="XENIUMRANGER_RESEGMENT" \\ + --xenium-bundle="${xenium_bundle}" \\ + --localcores=${task.cpus} \\ + --localmem=${task.memory.toGiga()} \\ + ${args} + + rm -rf "${prefix}" + mv XENIUMRANGER_RESEGMENT/outs "${prefix}" + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir -p "${prefix}" + touch "${prefix}/experiment.xenium" + """ +} diff --git a/modules/nf-core/xeniumranger/resegment/meta.yml b/modules/nf-core/xeniumranger/resegment/meta.yml new file mode 100644 index 00000000..687c6723 --- /dev/null +++ b/modules/nf-core/xeniumranger/resegment/meta.yml @@ -0,0 +1,69 @@ +name: xeniumranger_resegment +description: The xeniumranger resegment module allows you to generate a new + segmentation of the morphology image space by rerunning the Xenium Onboard + Analysis (XOA) segmentation algorithms with modified parameters. +keywords: + - spatial + - resegment + - morphology + - segmentation + - xeniumranger +tools: + - xeniumranger: + description: | + Xenium Ranger is a set of analysis pipelines that process Xenium In Situ Gene Expression data to relabel, resegment, or import new segmentation results from community-developed tools. Xenium Ranger provides flexible off-instrument reanalysis of Xenium In Situ data. Relabel transcripts, resegment cells with the latest 10x segmentation algorithms, or import your own segmentation data to assign transcripts to cells. + homepage: "https://www.10xgenomics.com/support/software/xenium-ranger/latest" + documentation: "https://www.10xgenomics.com/support/software/xenium-ranger/latest/getting-started" + tool_dev_url: "https://www.10xgenomics.com/support/software/xenium-ranger/latest/analysis" + licence: + - "10x Genomics EULA" + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing run information + e.g. [ id:'xenium_experiment' ] + - xenium_bundle: + type: directory + description: Path to the xenium output bundle generated by the Xenium + Onboard Analysis pipeline +output: + outs: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "${prefix}": + type: directory + description: Directory containing the output xenium bundle of Xenium + Ranger + pattern: "${prefix}" + versions_xeniumranger: + - - "${task.process}": + type: string + description: The process the versions were collected from + - xeniumranger: + type: string + description: The tool name + - "xeniumranger -V | sed -e 's/.*xenium-//'": + type: string + description: The command used to generate the version of the tool +topics: + versions: + - - "${task.process}": + type: string + description: The process the versions were collected from + - xeniumranger: + type: string + description: The tool name + - "xeniumranger -V | sed -e 's/.*xenium-//'": + type: string + description: The command used to generate the version of the tool +authors: + - "@khersameesh24" + - "@dongzehe" +maintainers: + - "@khersameesh24" + - "@dongzehe" diff --git a/modules/nf-core/xeniumranger/resegment/tests/main.nf.test b/modules/nf-core/xeniumranger/resegment/tests/main.nf.test new file mode 100644 index 00000000..241f084c --- /dev/null +++ b/modules/nf-core/xeniumranger/resegment/tests/main.nf.test @@ -0,0 +1,77 @@ +nextflow_process { + + name "Test Process XENIUMRANGER_RESEGMENT" + script "../main.nf" + process "XENIUMRANGER_RESEGMENT" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "xeniumranger" + tag "xeniumranger/resegment" + tag "unzip" + + setup { + run("UNZIP") { + script "modules/nf-core/unzip/main.nf" + process { + """ + input[0] = [[], file('https://raw.githubusercontent.com/nf-core/test-datasets/spatialaxe/Xenium_Prime_Mouse_Ileum_tiny_outs.zip', checkIfExists: true)] + """ + } + } + } + + test("xeniumranger resegment") { + when { + process { + """ + input[0] = channel.of([ + [id: "test_xeniumranger_resegment"], + ]).combine(UNZIP.out.unzipped_archive.map { it[1] }) + input[1] = [] + input[2] = [] + input[3] = [] + input[4] = [] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert process.out.outs != null }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'analysis_summary.html' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cells.csv.gz' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cells.parquet' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cells.zarr.zip' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'transcripts.parquet' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'transcripts.zarr.zip' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'analysis.zarr.zip' }).exists() }, + { assert path(process.out.outs.get(0).get(1).find { file(it).name == 'cell_feature_matrix.zarr.zip' }).exists() } + ) + } + } + + test("xeniumranger resegment stub") { + options "-stub" + when { + process { + """ + input[0] = channel.of([ + [id: "test_xeniumranger_resegment"], + ]).combine(UNZIP.out.unzipped_archive.map { it[1] }) + input[1] = [] + input[2] = [] + input[3] = [] + input[4] = [] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/xeniumranger/resegment/tests/main.nf.test.snap b/modules/nf-core/xeniumranger/resegment/tests/main.nf.test.snap new file mode 100644 index 00000000..16f94c14 --- /dev/null +++ b/modules/nf-core/xeniumranger/resegment/tests/main.nf.test.snap @@ -0,0 +1,35 @@ +{ + "xeniumranger resegment stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_xeniumranger_resegment" + }, + "fake_file.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,4671141281357e0ce26d9cb35fed23a8" + ], + "outs": [ + [ + { + "id": "test_xeniumranger_resegment" + }, + "fake_file.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,4671141281357e0ce26d9cb35fed23a8" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T23:22:35.438329" + } +} \ No newline at end of file diff --git a/modules/nf-core/xeniumranger/resegment/tests/nextflow.config b/modules/nf-core/xeniumranger/resegment/tests/nextflow.config new file mode 100644 index 00000000..e69de29b diff --git a/modules/nf-core/xeniumranger/resegment/tests/tags.yml b/modules/nf-core/xeniumranger/resegment/tests/tags.yml new file mode 100644 index 00000000..99f47c82 --- /dev/null +++ b/modules/nf-core/xeniumranger/resegment/tests/tags.yml @@ -0,0 +1,2 @@ +xeniumranger/resegment: + - "modules/nf-core/xeniumranger/resegment/**" diff --git a/nextflow.config b/nextflow.config index e081c0c2..cf75ae06 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,6 +1,6 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - nf-core/spatialxe Nextflow config file + nf-core/spatialaxe Nextflow config file ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Default config options for all compute environments ---------------------------------------------------------------------------------------- @@ -9,14 +9,113 @@ // Global default params, used in configs params { - // TODO nf-core: Specify your pipeline's command line flags // Input options - input = null + input = null // path to the samplesheet.csv containing meta,bundle,image + outdir = null // path to generate pipeline results at + mode = null // please check nextflow_schema.json for the modes you can select + method = null // name of the method to run for image or coordinate or segfree approaches + gene_panel = null // path to gene panel json file if `relabel_genes` is true + qupath_polygons = null // Path to qupath segmentation results in GeoJSON format + alignment_csv = null // image alignment file format a 3x3 transformation matrix, where the last row is [0,0,1] + cellpose_model = null // custom cellpose model to use for running or starting training + stardist_model = '2D_versatile_fluo' // stardist pretrained model for cell segmentation + stardist_nuclei_model = '2D_versatile_fluo' // stardist pretrained model for nuclei segmentation + stardist_prob_thresh = null // stardist probability threshold + stardist_nms_thresh = null // stardist NMS threshold + stardist_n_tiles = "8 8" // tiling for large images (Xenium images are ~20K×25K) + segmentation_mask = null // prior segmentation mask + probes_fasta = null // Fasta file for the probe sequences used in the xenium experiment + reference_annotations = null // Path to the genomic features (.gff) and fasta (.fa) files used as reference annotations + gene_synonyms = null // Gene synonyms that may have been counted as off-targets but simply differ in name - // References - genome = null - igenomes_base = 's3://ngi-igenomes/igenomes/' - igenomes_ignore = false + // GPU flag (set to true by the gpu profile) + use_gpu = false + gpu_queue = null // AWS Batch queue for GPU tasks (e.g., SEGGER, ProSeg) + cellpose_queue = null // AWS Batch queue for Cellpose (single large GPU) + + // execution specific + sharpen_tiff = false // wether to sharpen the morphology-focus tiff + nucleus_segmentation_only = false // to only run nucleus segmentation while running segmentation methods & XR_IMP-SEG + cell_segmentation_only = true // to only run cell segmentation while running segmentation methods & XR_IMP-SEG + cellpose_downscale = false // pre-downscale morphology image to avoid cellpose OOM on large images + + // Xeniumranger specific + xeniumranger_only = false // to generate redefined bundle with just changing the xr specific params + relabel_genes = false // wether to correct gene names with gene_panel.json + expansion_distance = 5 // default nuclear expansion distance in XOA v2.0 & later + dapi_filter = 100 // adjust the minimum peak intensity to use more nuclei + interior_stain = true // interior stain is enabled by default - false to disable + boundary_stain = true // boundary stain is enabled by default - false to disable + + // Segger specific + segmentation_refinement = false // wether to run segmentation refinement step (segger) + segger_accelerator = 'cpu' // either 'cuda' or 'cpu' + segger_knn_method = 'kd_tree' // 'cuda' - ensure your system has CUDA installed and configured properly + segger_num_workers = 4 // number of data-loader workers for segger + segger_model = null // path to a pre-trained segger model checkpoint + + // Proseg specific + format = 'xenium' // preset value set as `xenium` + + // Segmentation methods + image_seg_methods = ["cellpose", "xeniumranger", "baysor", "stardist"] + transcript_seg_methods = ["proseg", "segger", "baysor"] + segfree_methods = ["ficture", "baysor"] + + // Ficture specific + negative_control_regex = null + features = null + + // Baysor specific + filter_transcripts = false + min_qv = 20 + max_x = 24000.0 + min_x = 0.0 + max_y = 24000.0 + min_y = 0.0 + + // Generic tiling parameters (for proseg and other methods) + tiling = false // enable tiled segmentation (divide → parallel segmentation → stitch) + patch_grid = '3x3' // grid layout for tiling (rows x cols) + patch_overlap = 50 // overlap between patches in microns + patch_filter_method = null // post-stitch cell filtering: 'empirical', 'distribution', 'both', or null + patch_filter_iqr_multiplier = 3.0 // IQR multiplier for empirical cell size filtering + patch_filter_z_threshold = 4.0 // z-score threshold for distribution cell size filtering + + // Baysor-specific parameters + baysor_scale = 30 // Baysor --scale for non-tiled runs + baysor_config = null // path to baysor config TOML (optional) + baysor_tiling = true // enable tiled Baysor (divide → per-patch Baysor → stitch) + baysor_tiling_micron = 1200 // tile width in microns for Baysor tiling + baysor_tiling_overlap = 200 // overlap between Baysor patches in microns + baysor_tiling_balanced = true // balance transcripts across tiles (merge sparse tiles) + baysor_tiling_scale = 39 // Baysor --scale for tiled runs (larger to compensate for EM on tiles) + baysor_tiling_min_mols_per_cell = 120 // --min-molecules-per-cell for tiled Baysor + baysor_tiling_min_transcripts_per_cell = 50 // post-stitch cell filtering threshold + + // Baysor prior segmentation + // null — no prior (random EM init) + // 'cells' — Xenium bundle cell_id column (column-based, works with tiling) + // 'cellpose' — run Cellpose cell mask as image prior (non-tiled only) + baysor_prior = null + baysor_prior_confidence = 0.2 // prior-segmentation-confidence [0-1] + + // Segger specific + tile_width = 120 + tile_height = 120 + batch_size_train = 4 // larger batch size can speed up training, but requires more memory + devices = 4 // Use multiple GPUs by increasing the devices parameter to further accelerate training + max_epochs = 200 // increasing #epochs can improve model performance with more learning cycles, but extends training time + batch_size_predict = 1 // larger batch size can speed up training, but requires more memory + cc_analysis = false // to control connected component analysis + + // qc specific + run_qc = true // whether to run the qc layer of pipeline + offtarget_probe_tracking = false // whether to run off-target probe tracking (provide probe_fasta, reference sequences, gene synonyms ) + + // utility modules + csplit_x_bins = 2 // number of tiles along the x axis (total number of bins is product of x_bins * y_bins) + csplit_y_bins = 2 // number of tiles along the y axis // MultiQC options multiqc_config = null @@ -25,13 +124,18 @@ params { max_multiqc_email_size = '25.MB' multiqc_methods_description = null + // pipeline dev and testing option + buffer_samples = false // process one sample at a time from the multi-sample samplesheet + buffer_size = 1 // buffer size 0 means no buffering of samples + // Boilerplate options - outdir = null publish_dir_mode = 'copy' email = null email_on_fail = null plaintext_email = false + monochromeLogs = false monochrome_logs = false + hook_url = System.getenv('HOOK_URL') help = false help_full = false show_hidden = false @@ -47,15 +151,13 @@ params { custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" config_profile_contact = null config_profile_url = null + test_data_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/spatialaxe' + // Schema validation default options validate_params = true } -// Backwards compatibility for publishDir syntax -outputDir = params.outdir -workflow.output.mode = params.publish_dir_mode - // Load base.config by default for all pipelines includeConfig 'conf/base.config' @@ -67,7 +169,7 @@ profiles { nextflow.enable.configProcessNamesValidation = true } conda { - conda.enabled = true + conda.enabled = false docker.enabled = false singularity.enabled = false podman.enabled = false @@ -77,8 +179,8 @@ profiles { apptainer.enabled = false } mamba { - conda.enabled = true - conda.useMamba = true + conda.enabled = false + conda.useMamba = false docker.enabled = false singularity.enabled = false podman.enabled = false @@ -87,32 +189,23 @@ profiles { apptainer.enabled = false } docker { - docker.enabled = true - conda.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - docker.runOptions = '-u $(id -u):$(id -g)' + docker.enabled = true + docker.fixOwnership = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' } - arm64 { - process.arch = 'arm64' - // TODO https://github.com/nf-core/modules/issues/6694 - // For now if you're using arm64 you have to use wave for the sake of the maintainers - // wave profile - apptainer.ociAutoPull = true - singularity.ociAutoPull = true - wave.enabled = true - wave.freeze = true - wave.strategy = 'conda,container' - } - emulate_amd64 { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + arm { + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { singularity.enabled = true singularity.autoMounts = true + singularity.pullTimeout = '100 min' conda.enabled = false docker.enabled = false podman.enabled = false @@ -164,25 +257,44 @@ profiles { wave.freeze = true wave.strategy = 'conda,container' } + gitpod { + executor.name = 'local' + executor.cpus = 8 + executor.memory = 16.GB + } gpu { + params.use_gpu = true docker.runOptions = '-u $(id -u):$(id -g) --gpus all' apptainer.runOptions = '--nv' singularity.runOptions = '--nv' } + aws { + process { + withLabel:process_gpu { + // Must repeat base.config label properties — profile withLabel replaces, not merges + ext.use_gpu = { params.use_gpu } + accelerator = { params.use_gpu ? 1 : null } + containerOptions = { "--shm-size ${task.memory.toGiga()}g" } + queue = { params.gpu_queue ?: null } + } + withLabel:process_gpu_single { + ext.use_gpu = { params.use_gpu } + accelerator = { params.use_gpu ? 1 : null } + containerOptions = { "--shm-size ${task.memory.toGiga()}g" } + queue = { params.cellpose_queue ?: params.gpu_queue ?: null } + } + } + } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } } -// Load nf-core custom profiles from different institutions -// If params.custom_config_base is set AND either the NXF_OFFLINE environment variable is not set or params.custom_config_base is a local path, the nfcore_custom.config file from the specified base path is included. -// Load nf-core/spatialxe custom profiles from different institutions. +// Load nf-core custom profiles from different institutions │ includeConfig params.custom_config_base && (!System.getenv('NXF_OFFLINE') || !params.custom_config_base.startsWith('http')) ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" - -// Load nf-core/spatialxe custom profiles from different institutions. -// TODO nf-core: Optionally, you can add a pipeline-specific nf-core config at https://github.com/nf-core/configs -// includeConfig params.custom_config_base && (!System.getenv('NXF_OFFLINE') || !params.custom_config_base.startsWith('http')) ? "${params.custom_config_base}/pipeline/spatialxe.config" : "/dev/null" +// Load nf-core/spatialaxe custom profiles from different institutions. +includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/pipeline/spatialaxe.config" : "/dev/null" // Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile // Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled @@ -193,18 +305,19 @@ podman.registry = 'quay.io' singularity.registry = 'quay.io' charliecloud.registry = 'quay.io' -// Load igenomes.config if required -includeConfig !params.igenomes_ignore ? 'conf/igenomes.config' : 'conf/igenomes_ignored.config' - // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. env { + PYTORCH_CUDA_ALLOC_CONF = "expandable_segments:True" PYTHONNOUSERSITE = 1 R_PROFILE_USER = "/.Rprofile" R_ENVIRON_USER = "/.Renviron" JULIA_DEPOT_PATH = "/usr/local/share/julia" + MPLCONFIGDIR = "./tmp" + NUMBA_CACHE_DIR = "./tmp" + NUMBA_DISABLE_CACHE = 1 } // Set bash options @@ -219,6 +332,7 @@ process.shell = [ // Disable process selector warnings by default. Use debug profile to enable warnings. nextflow.enable.configProcessNamesValidation = false +nextflow.enable.moduleBinaries = true timeline { enabled = true @@ -229,8 +343,9 @@ report { file = "${params.outdir}/pipeline_info/execution_report_${params.trace_report_suffix}.html" } trace { - enabled = true - file = "${params.outdir}/pipeline_info/execution_trace_${params.trace_report_suffix}.txt" + enabled = true + overwrite = true + file = "${params.outdir}/pipeline_info/execution_trace_${params.trace_report_suffix}.txt" } dag { enabled = true @@ -238,32 +353,39 @@ dag { } manifest { - name = 'nf-core/spatialxe' + name = 'nf-core/spatialaxe' contributors = [ - // TODO nf-core: Update the field with the details of the contributors to your pipeline. New with Nextflow version 24.10.0 [ name: 'Sameesh Kher', - affiliation: '', - email: '', - github: '', - contribution: [], // List of contribution types ('author', 'maintainer' or 'contributor') - orcid: '' + affiliation: 'German Cancer Research Center (DKFZ), Heidelberg, DE', + email: 'sameesh.kher@dkfz-heidelberg.de', + github: '@khersameesh24', + contribution: ['author', 'maintainer'], + orcid: '0009-0008-2420-6464' ], [ - name: ' Florian Heyl', - affiliation: '', - email: '', - github: '', - contribution: [], // List of contribution types ('author', 'maintainer' or 'contributor') + name: 'Florian Heyl', + affiliation: 'German Cancer Research Center (DKFZ), Heidelberg, DE', + email: 'florian.heyl@dkfz-heidelberg.de', + github: '@heylf', + contribution: ['author', 'maintainer'], orcid: '' ], + [ + name: 'Dongze He', + affiliation: 'Altos Labs, San Diego, USA', + email: 'dongzehe.zaza@gmail.com', + github: '@dongzehe', + contribution: ['contributor'], + orcid: '0000-0001-8259-7434' + ] ] - homePage = 'https://github.com/nf-core/spatialxe' - description = """A pipeline for spatialomics Xenium In Situ data.""" + homePage = 'https://github.com/nf-core/spatialaxe' + description = """A pipeline for spatialomics 10x Xenium In Situ data.""" mainScript = 'main.nf' defaultBranch = 'master' - nextflowVersion = '!>=25.10.4' - version = '1.0.0' + nextflowVersion = '!>=25.04.0' + version = '1.1.0dev' doi = '' } @@ -276,5 +398,6 @@ validation { defaultIgnoreParams = ["genomes"] monochromeLogs = params.monochrome_logs } + // Load modules.config for DSL2 module specific options includeConfig 'conf/modules.config' diff --git a/nextflow_schema.json b/nextflow_schema.json index 5d9dff39..58a47276 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1,8 +1,8 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://raw.githubusercontent.com/nf-core/spatialxe/master/nextflow_schema.json", - "title": "nf-core/spatialxe pipeline parameters", - "description": "A pipeline for spatialomics Xenium In Situ data.", + "$id": "https://raw.githubusercontent.com/nf-core/spatialaxe/master/nextflow_schema.json", + "title": "nf-core/spatialaxe pipeline parameters", + "description": "A pipeline to process spatialomics data from 10x Xenium In Situ or 10x Atera.", "type": "object", "$defs": { "input_output_options": { @@ -10,7 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], + "required": ["input", "outdir", "mode"], "properties": { "input": { "type": "string", @@ -19,8 +19,8 @@ "schema": "assets/schema_input.json", "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", - "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/spatialxe/usage#samplesheet-input).", + "description": "Path to comma-separated file containing information about the Xenium experiment. (eg; meta,path-to-xenium-bundle,path-to-morphology.ome.tif))", + "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row.", "fa_icon": "fas fa-file-csv" }, "outdir": { @@ -29,6 +29,79 @@ "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", "fa_icon": "fas fa-folder-open" }, + "mode": { + "type": "string", + "description": "Mode in which the pipeline is to be run. Either image-based segmentation, coordinate-based segmentation, segmentation-free analysis or data preview.", + "enum": ["image", "coordinate", "segfree", "preview", "qc"] + }, + "method": { + "type": "string", + "enum": ["cellpose", "xeniumranger", "baysor", "proseg", "segger", "ficture", "stardist"], + "description": "Segmentation method to run." + }, + "gene_panel": { + "type": "string", + "format": "file-path", + "description": "Path to gene panel JSON file to use for relabeling transcripts with the correct gene." + }, + "qupath_polygons": { + "type": "string", + "description": "Path to qupath segmentation file in GeoJSON format.", + "format": "file-path" + }, + "alignment_csv": { + "type": "string", + "description": "Image alignment file containing similarity transform matrix. (e.g., the _imagealignment.csv file exported from Xenium Explorer)", + "format": "file-path" + }, + "cellpose_model": { + "type": "string", + "description": "Model to use for running or starting training.", + "format": "file-path" + }, + "stardist_model": { + "type": "string", + "default": "2D_versatile_fluo", + "description": "StarDist pretrained model for cell segmentation (e.g., '2D_versatile_fluo', '2D_versatile_he')." + }, + "stardist_nuclei_model": { + "type": "string", + "default": "2D_versatile_fluo", + "description": "StarDist pretrained model for nuclei segmentation." + }, + "stardist_prob_thresh": { + "type": "number", + "description": "StarDist object probability threshold. Lower values detect more objects." + }, + "stardist_nms_thresh": { + "type": "number", + "description": "StarDist non-maximum suppression threshold. Lower values reduce overlapping detections." + }, + "stardist_n_tiles": { + "type": "string", + "default": "8 8", + "description": "StarDist tiling for large images (e.g., '4 4'). Reduces memory usage." + }, + "segmentation_mask": { + "type": "string", + "description": "Prior segmentation mask from other segmentation methods.", + "format": "file-path" + }, + "probes_fasta": { + "type": "string", + "description": "Fasta file for the probe sequences used in the xenium experiment.", + "format": "file-path" + }, + "reference_annotations": { + "type": "string", + "description": "Path to the directory containing genomic features (.gff) and fasta (.fa) files used as reference annotations.", + "format": "file-path" + }, + "gene_synonyms": { + "type": "string", + "description": "Gene synonyms that may have been counted as off-targets but simply differ in name.", + "format": "file-path" + }, "email": { "type": "string", "description": "Email address for completion summary.", @@ -43,41 +116,309 @@ } } }, - "reference_genome_options": { - "title": "Reference genome options", + "segmentation_options": { + "title": "Segmentation options", "type": "object", - "fa_icon": "fas fa-dna", - "description": "Reference genome related files and options required for the workflow.", + "description": "Options for the segmentation layer of the spatialaxe pipeline", + "default": "", "properties": { - "genome": { + "run_qc": { + "type": "boolean", + "description": "Whether to run the qc layer in the pipeline.", + "default": true + }, + "offtarget_probe_tracking": { + "type": "boolean", + "description": "Whether to run the off-target probe tracking.", + "default": false + }, + "segmentation_refinement": { + "type": "boolean", + "description": "Whether to run refinement on the image-based segmentation methods. Runs coordinate-based methods after the initial image-based segmentation run." + }, + "relabel_genes": { + "type": "boolean", + "description": "Whether to relabel genes with gene_panel.json file. True when gene_panel is provided." + }, + "xeniumranger_only": { + "type": "boolean", + "description": "Whether to run vanilla xeniumranger workflow." + }, + "cell_segmentation_only": { + "type": "boolean", + "description": "Whether to only run nucleus segmentation." + }, + "nucleus_segmentation_only": { + "type": "boolean", + "description": "Whether to only run nucleus segmentation." + }, + "expansion_distance": { + "type": "integer", + "default": 5, + "description": "Nuclei boundary expansion distance in µm. Default: 5 (Min: 0, Max: 15 if either boundary-stain or interior-stain are enabled and 100 if nucleus-expansion only)" + }, + "dapi_filter": { + "type": "integer", + "default": 100, + "description": "Minimum intensity in photoelectrons (pe) to filter nuclei. Default: 100. (appropriate range of values is 0 to 99th percentile of image stack or 1000, whichever is larger)" + }, + "interior_stain": { + "type": "boolean", + "default": true, + "description": "Specify the name of the interior stain to use or disable. Supported for cell segmentation staining workflow output bundles. Possible options are: \\\"18S\\\" (default) or \\\"disable\\\"" + }, + "boundary_stain": { + "type": "boolean", + "default": true, + "description": "Specify the name of the boundary stain to use or disable. Supported for cell segmentation staining workflow output bundles. Possible options are: \\\"ATP1A1/CD45/E-Cadherin\\\" (default) or \\\"disable\\\"" + }, + "use_gpu": { + "type": "boolean", + "default": false, + "description": "Enable GPU acceleration (set automatically by the gpu profile)." + }, + "gpu_queue": { "type": "string", - "description": "Name of iGenomes reference.", - "fa_icon": "fas fa-book", - "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." + "description": "AWS Batch queue for GPU tasks (e.g., Segger, ProSeg)." }, - "fasta": { + "cellpose_queue": { + "type": "string", + "description": "AWS Batch queue for Cellpose (single large GPU)." + }, + "cellpose_downscale": { + "type": "boolean", + "default": false, + "description": "Pre-downscale morphology image to avoid Cellpose OOM on large images." + }, + "sharpen_tiff": { + "type": "boolean", + "description": "Whether to enhance the morphology.ome.tif file." + }, + "segger_accelerator": { + "type": "string", + "default": "cpu", + "enum": ["cpu", "cuda"], + "description": "Device used for training. (e.g., cuda for GPU or cpu)" + }, + "segger_knn_method": { + "type": "string", + "default": "kd_tree", + "enum": ["kd_tree", "cuda"], + "description": "Method for KNN computation. (e.g., cuda for GPU-based computation)" + }, + "segger_num_workers": { + "type": "integer", + "default": 4, + "description": "Number of data-loader workers for Segger." + }, + "segger_model": { "type": "string", "format": "file-path", - "exists": true, - "mimetype": "text/plain", - "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", - "description": "Path to FASTA genome file.", - "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", - "fa_icon": "far fa-file-code" + "description": "Path to a pre-trained Segger model checkpoint." + }, + "format": { + "type": "string", + "default": "xenium", + "enum": ["xenium", "cosmx", "merscope"], + "description": "Input data platform. Used by proseg, segger, and spatialdata modules." + }, + "image_seg_methods": { + "type": "array", + "items": { + "type": "string", + "enum": ["cellpose", "xeniumranger", "baysor", "stardist"] + }, + "description": "List of image-based segmentation methods." }, - "igenomes_ignore": { + "transcript_seg_methods": { + "type": "array", + "items": { + "type": "string", + "enum": ["proseg", "segger", "baysor"] + }, + "description": "List of transcript-based segmentation methods." + }, + "segfree_methods": { + "type": "array", + "items": { + "type": "string", + "enum": ["ficture", "baysor"] + }, + "description": "List of segmentation-free methods." + }, + "negative_control_regex": { + "type": "string", + "description": "Regex used to identify or match negative control samples in a dataset." + }, + "features": { + "type": "string", + "description": "List of features to be passed to the ficture method. (eg: TP53,OCIAD1,BCAS3,SOX)" + }, + "filter_transcripts": { "type": "boolean", - "description": "Do not load the iGenomes reference config.", - "fa_icon": "fas fa-ban", - "hidden": true, - "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." + "description": "Whether to filter the transcripts.parquet file before running Baysor segmentation.", + "default": false }, - "igenomes_base": { + "baysor_scale": { + "type": "integer", + "default": 30, + "description": "Baysor --scale parameter for non-tiled runs." + }, + "baysor_config": { "type": "string", - "description": "The base path to the igenomes reference files", - "fa_icon": "fas fa-ban", - "hidden": true, - "default": "s3://ngi-igenomes/igenomes/" + "format": "file-path", + "description": "Path to Baysor config TOML file (optional)." + }, + "baysor_tiling": { + "type": "boolean", + "default": true, + "description": "Enable tiled Baysor segmentation (divide transcripts into patches, run Baysor per patch, stitch results)." + }, + "baysor_tiling_micron": { + "type": "integer", + "default": 1200, + "description": "Tile width in microns for Baysor tiling." + }, + "baysor_tiling_overlap": { + "type": "integer", + "default": 200, + "description": "Overlap between Baysor patches in microns." + }, + "baysor_tiling_balanced": { + "type": "boolean", + "default": true, + "description": "Balance transcripts across tiles by merging sparse tiles." + }, + "baysor_tiling_scale": { + "type": "integer", + "default": 39, + "description": "Baysor --scale for tiled runs (larger to compensate for EM on smaller tiles)." + }, + "baysor_tiling_min_mols_per_cell": { + "type": "integer", + "default": 120, + "description": "Minimum molecules per cell (--min-molecules-per-cell) for tiled Baysor." + }, + "baysor_tiling_min_transcripts_per_cell": { + "type": "integer", + "default": 50, + "description": "Post-stitch cell filtering threshold: minimum transcripts per cell." + }, + "baysor_prior": { + "type": "string", + "enum": ["cells", "cellpose"], + "description": "Prior segmentation type for Baysor. 'cells' uses Xenium bundle cell_id column; 'cellpose' uses Cellpose mask as image prior." + }, + "baysor_prior_confidence": { + "type": "number", + "default": 0.2, + "description": "Baysor prior-segmentation-confidence (0-1)." + }, + "min_qv": { + "type": "number", + "default": 20, + "description": "Minimum Q-Score to pass filtering." + }, + "min_x": { + "type": "number", + "description": "only keep transcripts whose x-coordinate is greater than specified limit, if no limit is specified, the default minimum value will be 0.0" + }, + "max_x": { + "type": "number", + "description": "only keep transcripts whose x-coordinate is less than specified limit, if no limit is specified, the default value will retain all transcripts since Xenium slide is <24000 microns in x and y (default: 24000.0)" + }, + "min_y": { + "type": "number", + "description": "only keep transcripts whose y-coordinate is greater than specified limit, if no limit is specified, the default minimum value will be 0.0" + }, + "max_y": { + "type": "number", + "description": "only keep transcripts whose y-coordinate is less than specified limit, if no limit is specified, the default value will retain all transcripts since Xenium slide is <24000 microns in x and y (default: 24000.0)" + }, + "tiling": { + "type": "boolean", + "description": "Enable tiled segmentation for large datasets. Divides transcripts into overlapping patches, runs segmentation in parallel per patch, then stitches results.", + "default": false + }, + "patch_grid": { + "type": "string", + "description": "Grid layout for tiling (rows x cols), e.g. '3x3', '4x4'.", + "default": "3x3" + }, + "patch_overlap": { + "type": "integer", + "description": "Overlap between adjacent patches in microns.", + "default": 50 + }, + "patch_filter_method": { + "type": "string", + "description": "Post-stitch cell size filtering method. Options: 'empirical' (IQR-based), 'distribution' (z-score), 'both', or null to disable.", + "enum": ["empirical", "distribution", "both"] + }, + "patch_filter_iqr_multiplier": { + "type": "number", + "description": "IQR multiplier for empirical cell size filtering during stitching.", + "default": 3.0 + }, + "patch_filter_z_threshold": { + "type": "number", + "description": "Z-score threshold for distribution-based cell size filtering during stitching.", + "default": 4.0 + }, + "csplit_x_bins": { + "type": "integer", + "default": 2, + "description": "Number of tiles along the x axis for cell-type separability." + }, + "csplit_y_bins": { + "type": "integer", + "default": 2, + "description": "Number of tiles along the y axis for cell-type separability." + }, + "tile_width": { + "type": "integer", + "description": "Width of the tiles in pixels", + "default": 120 + }, + "tile_height": { + "type": "integer", + "description": "Height of the tiles in pixels", + "default": 120 + }, + "batch_size_train": { + "type": "integer", + "description": "Number of samples to process per training batch", + "default": 4 + }, + "devices": { + "type": "integer", + "description": "Number of devices (GPUs) to use during training", + "default": 4 + }, + "max_epochs": { + "type": "integer", + "description": "Number of training epochs", + "default": 200 + }, + "batch_size_predict": { + "type": "integer", + "description": "Number of samples to process per batch during prediction", + "default": 1 + }, + "cc_analysis": { + "type": "boolean", + "description": "Whether to use connected components for grouping transcripts without direct nucleus association", + "default": false + }, + "buffer_samples": { + "type": "boolean", + "description": "Process only one sample at a time from a multi-sample samplesheet.", + "default": false + }, + "buffer_size": { + "type": "integer", + "description": "Number of sample(s) to process at a time from a multi-sample samplesheet. Works if buffered_samples is true.", + "default": 1 } } }, @@ -126,6 +467,11 @@ "description": "Institutional config URL link.", "hidden": true, "fa_icon": "fas fa-users-cog" + }, + "test_data_base": { + "type": "string", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/spatialaxe", + "description": "Base path / URL for data used in the test profiles." } } }, @@ -136,6 +482,12 @@ "description": "Less common options for the pipeline, typically set in a config file.", "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, "version": { "type": "boolean", "description": "Display version and exit.", @@ -179,6 +531,13 @@ "fa_icon": "fas fa-palette", "hidden": true }, + "hook_url": { + "type": "string", + "description": "Incoming hook URL for messaging service", + "fa_icon": "fas fa-people-group", + "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", + "hidden": true + }, "multiqc_config": { "type": "string", "format": "file-path", @@ -204,23 +563,23 @@ "fa_icon": "fas fa-check-square", "hidden": true }, + "monochromeLogs": { + "type": "boolean", + "description": "Do not use coloured log outputs", + "hidden": true + }, "pipelines_testdata_base_path": { "type": "string", - "fa_icon": "far fa-check-circle", - "description": "Base URL or local path to location of pipeline test dataset files", "default": "https://raw.githubusercontent.com/nf-core/test-datasets/", + "description": "Base URL or local path to location of pipeline test dataset files", "hidden": true }, "trace_report_suffix": { "type": "string", "fa_icon": "far calendar", - "description": "Suffix to add to the trace report filename. Default is the date and time in the format yyyy-MM-dd_HH-mm-ss.", + "description": "Suffix to add to the trace report filename. Default is the date and time in the format yyyy-MM-dd_HH-mm-ss", "hidden": true }, - "help": { - "type": ["boolean", "string"], - "description": "Display the help message." - }, "help_full": { "type": "boolean", "description": "Display the full detailed help message." @@ -237,7 +596,7 @@ "$ref": "#/$defs/input_output_options" }, { - "$ref": "#/$defs/reference_genome_options" + "$ref": "#/$defs/segmentation_options" }, { "$ref": "#/$defs/institutional_config_options" diff --git a/nf-test.config b/nf-test.config index f7aaeb4a..3a1fff59 100644 --- a/nf-test.config +++ b/nf-test.config @@ -1,35 +1,21 @@ config { // location for all nf-test tests - testsDir = "." + testsDir "." // nf-test directory including temporary files for each test - workDir = System.getenv("NFT_WORKDIR") ?: ".nf-test" + workDir System.getenv("NFT_WORKDIR") ?: ".nf-test" // location of an optional nextflow.config file specific for executing tests - configFile = "tests/nextflow.config" + configFile "tests/nextflow.config" // ignore tests coming from the nf-core/modules repo - ignore = [ - 'modules/nf-core/**/tests/*', - 'subworkflows/nf-core/**/tests/*', - ] + ignore 'modules/nf-core/**/tests/*', 'subworkflows/nf-core/**/tests/*' // run all test with defined profile(s) from the main nextflow.config - profile = "test" + profile "test" // list of filenames or patterns that should be trigger a full test run - triggers = [ - '.github/actions/nf-test/action.yml', - '.github/workflows/nf-test.yml', - 'assets/schema_input.json', - 'bin/*', - 'conf/test.config', - 'nextflow.config', - 'nextflow_schema.json', - 'nf-test.config', - 'tests/.nftignore', - 'tests/nextflow.config', - ] + triggers 'nextflow.config', 'nf-test.config', 'conf/test.config', 'tests/nextflow.config', 'tests/.nftignore' // load the necessary plugins plugins { diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 2d0ac5c2..e09dc9f3 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -21,9 +21,9 @@ { "@id": "./", "@type": "Dataset", - "creativeWorkStatus": "Stable", - "datePublished": "2026-04-30T13:33:23+00:00", - "description": "

\n \n \n \"nf-core/spatialxe\"\n \n

\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/spatialxe)\n[![GitHub Actions CI Status](https://github.com/nf-core/spatialxe/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/spatialxe/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/spatialxe/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/spatialxe/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/spatialxe/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.4-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-4.0.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/4.0.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/spatialxe)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23spatialxe-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/spatialxe)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/spatialxe** is a bioinformatics pipeline that ...\n\n\n\n\n1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/get_started/environment_setup/overview) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/get_started/run-your-first-pipeline) with `-profile test` before running the workflow on actual data.\n\n\n\nNow, you can run the pipeline using:\n\n\n\n```bash\nnextflow run nf-core/spatialxe \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/running/run-pipelines#using-parameter-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/spatialxe/usage) and the [parameter documentation](https://nf-co.re/spatialxe/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/spatialxe/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/spatialxe/output).\n\n## Credits\n\nnf-core/spatialxe was originally written by Sameesh Kher, Florian Heyl.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](docs/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#spatialxe` channel](https://nfcore.slack.com/channels/spatialxe) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\n\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "creativeWorkStatus": "InProgress", + "datePublished": "2026-06-17T15:15:09+00:00", + "description": "

\n \n \n \"nf-core/spatialaxe\"\n \n

\n\n[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://github.com/codespaces/new/nf-core/spatialaxe)\n[![GitHub Actions CI Status](https://github.com/nf-core/spatialaxe/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/spatialaxe/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/spatialaxe/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/spatialaxe/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/spatialaxe/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.4.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.4.1)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/spatialaxe)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23spatialaxe-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/spatialaxe)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/spatialaxe** is a bioinformatics best-practice processing and quality control pipeline for Xenium (and soon Atera) data. The current plan for the pipeline implementation is shown in the metromap below. **The pipeline is under active developement and changes might occure frequently**.\n\n![nf-core/spatialaxe-metromap](docs/images/spatialaxe-metromap.png)\n\n> [!NOTE]\n> We are currently extending the pipeline for the [10x Atera system](https://www.10xgenomics.com/platforms/atera).\n\n## Tools supported\n\nThe pipeline supports the following tools:\n\n- Segmenation methods:\n - [Baysor](https://doi.org/10.1038/s41587-021-01044-w)\n - [Cellpose](https://doi.org/10.1038/s41592-020-01018-x)\n - [Xenium ranger (XR)](https://www.10xgenomics.com/support/software/xenium-ranger/latest)\n - [StarDist](https://doi.org/10.48550/arXiv.2203.02284)\n- Segmentation free methods:\n - [Ficture](https://doi.org/10.1038/s41592-024-02415-2)\n - [Baysor](https://doi.org/10.1038/s41587-021-01044-w)\n- Transcript assignment methods:\n - [Segger](https://doi.org/10.1101/2025.03.14.643160)\n - [Proseg](https://doi.org/10.1038/s41592-025-02697-0)\n- Utility methods:\n - [SpatialData](https://doi.org/10.1038/s41592-024-02212-x)\n - [Baysor](https://doi.org/10.1038/s41587-021-01044-w)\n- QC methods:\n - [MultiQC Xenium Extra Plugin](https://github.com/MultiQC/xenium-extra)\n - [OPT](https://github.com/JEFworks-Lab/off-target-probe-tracker)\n\n## Usage\n\nOn release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/spatialaxe/results).\n\n> [!NOTE]\n> The pipeline does not support conda currently. We are working on it.\n\n## Quick Start\n\n`samplesheet.csv`:\n\n```csv\nsample,bundle,image\ntest_sample,/path/to/xenium-bundle,/path/to/morphology.ome.tif\n```\n\nNow, you can run the pipeline using:\n\n### Run image-based segmentation mode
\n\n`CELLPOSE -> BAYSOR -> XR-IMPORT_SEGMENTATION -> SPATIALDATA -> QC`\n\n```bash\nnextflow run nf-core/spatialaxe \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \\\n --mode \n```\n\n### Run coordinate-based segmentation mode
\n\n`PROSEG -> PROSEG2BAYSOR -> XR-IMPORT_SEGMENTATION -> SPATIALDATA -> QC`\n\n```bash\nnextflow run nf-core/spatialaxe \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \\\n --mode coordinate\n```\n\n### Run segfree mode
\n\n`BAYSOR_SEGFREE`\n\n```bash\nnextflow run nf-core/spatialaxe \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \\\n --mode segfree\n```\n\n### Run preview mode
\n\n`BAYSOR_PREVIEW`\n\n```bash\nnextflow run nf-core/spatialaxe \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \\\n --mode preview\n```\n\n### Run just the quality control
\n\n```bash\nnextflow run nf-core/spatialaxe \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \\\n --mode qc\n```\n\n### Additional information\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/spatialaxe/usage) and the [parameter documentation](https://nf-co.re/spatialaxe/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/spatialaxe/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/spatialaxe/output).\n\n## Runtime and resource estimations\n\n| Tool | Compute | Runtime (min / med / max) | Peak RSS (min / med / max) |\n| ------------------------- | ------- | ------------------------- | -------------------------- |\n| Cellpose | GPU | 1m / 4m / 1.4h | 10 GB / 26 GB / 554 GB |\n| Cellpose | CPU | 1.3h / 2.3h / 6.5h | 161 GB / 426 GB / 1115 GB |\n| StarDist | GPU | 1m / 4m / 7m | 5 GB / 12 GB / 18 GB |\n| StarDist | CPU | 5m / 6m / 7m | 18 GB / 18 GB / 18 GB |\n| Segger (create_dataset) | GPU | 2m / 9m / 31m | 1.7 GB / 14 GB / 50 GB |\n| Segger (create_dataset) | CPU | 13m / 21m / 46m | 13 GB / 19 GB / 49 GB |\n| Segger (train) | GPU | 10m / 43m / 2.9h | 30 GB / 33 GB / 60 GB |\n| Segger (predict) | GPU | 2m / 16m / 59m | 10 GB / 25 GB / 87 GB |\n| Baysor (whole-image) | CPU | 2m / 30m / 17h | 6 GB / 10 GB / 650 GB |\n| Baysor (tiled) | CPU | 1m / 18m / 13h | 0.2 GB / 34 GB / 530 GB |\n| Proseg | CPU | 1m / 18m / 6.8h | 279 MB / 3.8 GB / 136 GB |\n| XeniumRanger (resegment) | CPU | 18m / 39m / 3.7h | 28 GB / 54 GB / 60 GB |\n| XeniumRanger (import_seg) | CPU | 2m / 7m / 2.7h | 2.6 GB / 11 GB / 51 GB |\n| Ficture (preprocess) | CPU | 3m / 4m / 13m | 331 MB / 357 MB / 21 GB |\n\n- Cellpose GPU vs CPU: 35x faster on GPU (4m median vs 2.3h), 16x less memory (26 GB vs 426 GB)\n- Segger: Only tool that truly requires GPU for all 3 steps (create_dataset, train, predict)\n- StarDist: Very fast on CPU, GPU is not necessary to run its default model\n\n## Credits\n\nnf-core/spatialaxe is mainly developed by [Sameesh Kher](https://github.com/khersameesh24), [Dongze He](https://github.com/dongzehe), and [Florian Heyl](https://github.com/heylf).\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- Tobias Krause\n- Krešimir Beštak (kbestak)\n- Matthias Hörtenhuber (mashehu)\n- Maxime Garcia (maxulysse)\n- Kübra Narcı (kubranarci)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#spatialaxe` channel](https://nfcore.slack.com/channels/spatialaxe) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" @@ -31,6 +31,9 @@ { "@id": "assets/" }, + { + "@id": "bin/" + }, { "@id": "conf/" }, @@ -43,6 +46,9 @@ { "@id": "modules/" }, + { + "@id": "modules/local/" + }, { "@id": "modules/nf-core/" }, @@ -92,17 +98,17 @@ "@id": ".prettierignore" } ], - "isBasedOn": "https://github.com/nf-core/spatialxe", + "isBasedOn": "https://github.com/nf-core/spatialaxe", "license": "MIT", "mainEntity": { "@id": "main.nf" }, "mentions": [ { - "@id": "#1c52c7cb-def3-43c9-9e10-865c2cf0ba78" + "@id": "#7c4a8872-f8d4-4f4b-a7f2-c2754d484851" } ], - "name": "nf-core/spatialxe" + "name": "nf-core/spatialaxe" }, { "@id": "ro-crate-metadata.json", @@ -121,22 +127,24 @@ }, { "@id": "main.nf", - "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], - "contributor": [ + "@type": [ + "File", + "SoftwareSourceCode", + "ComputationalWorkflow" + ], + "creator": [ { "@id": "https://orcid.org/0009-0008-2420-6464" - }, - { - "@id": "https://orcid.org/0000-0002-3651-5685" } ], "dateCreated": "", - "dateModified": "2026-04-30T13:33:23Z", + "dateModified": "2026-06-17T17:15:09Z", "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", "keywords": [ "nf-core", "nextflow", "10x-genomics", + "atera", "image-processing", "spatial", "spatial-data-analysis", @@ -144,16 +152,30 @@ "transcriptomics", "xenium" ], - "license": ["MIT"], - "name": ["nf-core/spatialxe"], + "license": [ + "MIT" + ], + "maintainer": [ + { + "@id": "https://orcid.org/0009-0008-2420-6464" + } + ], + "name": [ + "nf-core/spatialaxe" + ], "programmingLanguage": { "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow" }, "sdPublisher": { "@id": "https://nf-co.re/" }, - "url": ["https://github.com/nf-core/spatialxe", "https://nf-co.re/spatialxe/1.0.0/"], - "version": ["1.0.0"] + "url": [ + "https://github.com/nf-core/spatialaxe", + "https://nf-co.re/spatialaxe/dev/" + ], + "version": [ + "1.1.0dev" + ] }, { "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow", @@ -165,26 +187,26 @@ "url": { "@id": "https://www.nextflow.io/" }, - "version": "!>=25.10.4" + "version": "!>=25.04.0" }, { - "@id": "#1c52c7cb-def3-43c9-9e10-865c2cf0ba78", + "@id": "#7c4a8872-f8d4-4f4b-a7f2-c2754d484851", "@type": "TestSuite", "instance": [ { - "@id": "#162edd3a-9c52-463a-94f5-78ed109f8513" + "@id": "#3ab54b2a-9518-45a0-8008-3839d03402c0" } ], "mainEntity": { "@id": "main.nf" }, - "name": "Test suite for nf-core/spatialxe" + "name": "Test suite for nf-core/spatialaxe" }, { - "@id": "#162edd3a-9c52-463a-94f5-78ed109f8513", + "@id": "#3ab54b2a-9518-45a0-8008-3839d03402c0", "@type": "TestInstance", - "name": "GitHub Actions workflow for testing nf-core/spatialxe", - "resource": "repos/nf-core/spatialxe/actions/workflows/nf-test.yml", + "name": "GitHub Actions workflow for testing nf-core/spatialaxe", + "resource": "repos/nf-core/spatialaxe/actions/workflows/nf-test.yml", "runsOn": { "@id": "https://w3id.org/ro/terms/test#GithubService" }, @@ -203,6 +225,11 @@ "@type": "Dataset", "description": "Additional files" }, + { + "@id": "bin/", + "@type": "Dataset", + "description": "Scripts that must be callable from a pipeline process" + }, { "@id": "conf/", "@type": "Dataset", @@ -223,6 +250,11 @@ "@type": "Dataset", "description": "Modules used by the pipeline" }, + { + "@id": "modules/local/", + "@type": "Dataset", + "description": "Pipeline-specific modules" + }, { "@id": "modules/nf-core/", "@type": "Dataset", @@ -312,12 +344,8 @@ { "@id": "https://orcid.org/0009-0008-2420-6464", "@type": "Person", + "email": "khersameesh24@gmail.com", "name": "Sameesh Kher" - }, - { - "@id": "https://orcid.org/0000-0002-3651-5685", - "@type": "Person", - "name": "Florian Heyl" } ] -} +} \ No newline at end of file diff --git a/subworkflows/local/baysor_generate_preview/main.nf b/subworkflows/local/baysor_generate_preview/main.nf new file mode 100644 index 00000000..2494fcbd --- /dev/null +++ b/subworkflows/local/baysor_generate_preview/main.nf @@ -0,0 +1,49 @@ +// +// Run baysor create_dataset & preview +// + +include { BAYSOR_PREVIEW } from '../../../modules/local/baysor/preview/main' +include { BAYSOR_CREATE_DATASET } from '../../../modules/local/baysor/create_dataset/main' +include { EXTRACT_PREVIEW_DATA } from '../../../modules/local/utility/extract_preview_data/main' +include { PARQUET_TO_CSV } from '../../../modules/local/utility/parquet_to_csv/main' + +workflow BAYSOR_GENERATE_PREVIEW { + take: + ch_transcripts_file // channel: [ val(meta), ["path-to-transcripts.parquet"] ] + ch_config // channel: ["path-to-xenium.toml"] + + main: + + ch_preview_mqc_html = channel.empty() + ch_preview_mqc_png = channel.empty() + + + // run parquet to csv + PARQUET_TO_CSV(ch_transcripts_file, ".csv") + + // generate randomised sample data + BAYSOR_CREATE_DATASET(PARQUET_TO_CSV.out.transcripts_csv, 0.3) + + // run baysor preview if param - generate_preview is true + ch_sampled_transcripts = BAYSOR_CREATE_DATASET.out.sampled_transcripts + ch_baysor_preview_input = ch_sampled_transcripts + .combine(ch_config) + .map { meta, transcripts, config -> + tuple( + meta, + transcripts, + config + ) + } + BAYSOR_PREVIEW(ch_baysor_preview_input) + + // clean the preview html file generated + EXTRACT_PREVIEW_DATA(BAYSOR_PREVIEW.out.preview_html) + + ch_preview_mqc_html = EXTRACT_PREVIEW_DATA.out.mqc_data + ch_preview_mqc_png = EXTRACT_PREVIEW_DATA.out.mqc_img + + emit: + preview_html = ch_preview_mqc_html // channel: [ val(meta), ["*_mqc.tsv"] ] + preview_img = ch_preview_mqc_png // channel: [ val(meta), ["*_mqc.png"] ] +} diff --git a/subworkflows/local/baysor_generate_preview/meta.yml b/subworkflows/local/baysor_generate_preview/meta.yml new file mode 100644 index 00000000..5807c233 --- /dev/null +++ b/subworkflows/local/baysor_generate_preview/meta.yml @@ -0,0 +1,34 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "baysor_generate_preview" +description: quick preview to get meaning from the data and to get some guesses about the parameters of the full baysor run +keywords: + - baysor + - preview + - transcripts + - report + - preview_html + - html_report + - visualization +components: + - baysor/preview + - baysor/create/dataset + - parquet/to/csv + - extract/preview/data +input: + - ch_transcripts_parquet: + description: | + input parquet file from the xenium bundle + Structure: [ val(meta), path("path-to-transcripts.parquet") ] + - ch_config: + description: | + config file for the xenium baysor run (stored in assets/config/xenium.toml) + Structure: [ path("path-to-xenium.toml") ] +output: + - preview_html: + description: | + Preview html file generated with the baysor preview command + Structure: [ val(meta), path("path-to-preview.html") ] +authors: + - "@khersameesh24" +maintainers: + - "@khersameesh24" diff --git a/subworkflows/local/baysor_generate_segfree/main.nf b/subworkflows/local/baysor_generate_segfree/main.nf new file mode 100644 index 00000000..74a160f0 --- /dev/null +++ b/subworkflows/local/baysor_generate_segfree/main.nf @@ -0,0 +1,52 @@ +// +// Run baysor segfree +// + +include { BAYSOR_PREPROCESS_TRANSCRIPTS } from '../../../modules/local/baysor/preprocess/main' +include { BAYSOR_SEGFREE } from '../../../modules/local/baysor/segfree/main' +// include a module to process the output loom file with scapny or anndata + +workflow BAYSOR_GENERATE_SEGFREE { + take: + ch_transcripts_file // channel: [ val(meta), ["transcripts.parquet"] ] + ch_config // channel: [ ["path-to-xenium.toml"] ] + max_x // value: spatial filter upper x bound + max_y // value: spatial filter upper y bound + min_qv // value: minimum transcript QV + min_x // value: spatial filter lower x bound + min_y // value: spatial filter lower y bound + + main: + + ch_transcripts = channel.empty() + + // Always preprocess transcripts.parquet to CSV for Baysor 0.7.1 compatibility. + // Baysor's Julia Parquet.jl cannot read zstd-compressed parquet files from Xenium bundles. + // Also applies optional spatial/QV filtering when filter_transcripts is true. + BAYSOR_PREPROCESS_TRANSCRIPTS( + ch_transcripts_file, + min_qv, + max_x, + min_x, + max_y, + min_y, + ) + ch_transcripts = BAYSOR_PREPROCESS_TRANSCRIPTS.out.transcripts_file + + // run baysor segfree + ch_baysor_segfree_input = ch_transcripts + .combine(ch_config) + .map { meta, transcripts, config -> + tuple( + meta, + transcripts, + config + ) + } + BAYSOR_SEGFREE( + ch_baysor_segfree_input + ) + + emit: + ncvs = BAYSOR_SEGFREE.out.ncvs // channel: [ val(meta), ["ncvs.loom"] ] +} diff --git a/subworkflows/local/baysor_generate_segfree/meta.yml b/subworkflows/local/baysor_generate_segfree/meta.yml new file mode 100644 index 00000000..b312ff00 --- /dev/null +++ b/subworkflows/local/baysor_generate_segfree/meta.yml @@ -0,0 +1,28 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "baysor_generate_segfree" +description: with segfree analyses that don't require segmentation, can be run on local neighborhoods instead (Neighborhood Composition Vectors (NCVs)) +keywords: + - baysor + - segfree + - neighborhoods + - loom +components: + - baysor/segfree +input: + - ch_transcripts_parquet: + description: | + input parquet file from the xenium bundle + Structure: [ val(meta), path("path-to-transcripts.parquet") ] + - ch_config: + description: | + config file for the xenium baysor run (stored in assets/config/xenium.toml) + Structure: [ path("path-to-xenium.toml") ] +output: + - ncvs: + description: | + loom file generated with the baysor segfree command + Structure: [ val(meta), path("path-to-ncvs.loom") ] +authors: + - "@khersameesh24" +maintainers: + - "@khersameesh24" diff --git a/subworkflows/local/baysor_run_prior_segmentation_mask/main.nf b/subworkflows/local/baysor_run_prior_segmentation_mask/main.nf new file mode 100644 index 00000000..d5acc0a1 --- /dev/null +++ b/subworkflows/local/baysor_run_prior_segmentation_mask/main.nf @@ -0,0 +1,83 @@ +// +// Run baysor run & import-segmentation +// + +include { BAYSOR_PREPROCESS_TRANSCRIPTS } from '../../../modules/local/baysor/preprocess/main' +include { BAYSOR_RUN } from '../../../modules/local/baysor/run/main' +include { XENIUMRANGER_IMPORT_SEGMENTATION } from '../../../modules/nf-core/xeniumranger/import-segmentation/main' + + +workflow BAYSOR_RUN_PRIOR_SEGMENTATION_MASK { + take: + ch_bundle_path // channel: [ val(meta), ["path-to-xenium-bundle"] ] + ch_transcripts_file // channel: [ val(meta), ["path-to-transcripts.parquet"] ] + ch_segmentation_mask // channel: [ ["path-to-prior-segmentation-mask"] ] + ch_config // channel: [ "path-to-xenium.toml" ] + max_x // value: spatial filter upper x bound + max_y // value: spatial filter upper y bound + min_qv // value: minimum transcript QV + min_x // value: spatial filter lower x bound + min_y // value: spatial filter lower y bound + + main: + + ch_transcripts = channel.empty() + + ch_redefined_bundle = channel.empty() + ch_coordinate_space = channel.value("pixels") + + // Always preprocess transcripts.parquet to CSV for Baysor 0.7.1 compatibility. + // Baysor's Julia Parquet.jl cannot read zstd-compressed parquet files from Xenium bundles. + // Also applies optional spatial/QV filtering when filter_transcripts is true. + BAYSOR_PREPROCESS_TRANSCRIPTS( + ch_transcripts_file, + min_qv, + max_x, + min_x, + max_y, + min_y, + ) + ch_transcripts = BAYSOR_PREPROCESS_TRANSCRIPTS.out.transcripts_file + + + // run baysor with prior segmentation mask + ch_baysor_input = ch_transcripts + .combine(ch_segmentation_mask) + .combine(ch_config) + .map { meta, transcripts, mask, config -> + tuple( + meta, + transcripts, + mask, + config, + 30, + ) + } + BAYSOR_RUN(ch_baysor_input) + + + // run import-segmentation with baysor outs + ch_imp_seg_inputs = ch_bundle_path + .combine(BAYSOR_RUN.out.segmentation, by: 0) + .map { meta, bundle, _segmentation_csv, polygons2d -> + tuple( + meta, + bundle, + [], + [], + polygons2d, + polygons2d, + [], + ch_coordinate_space.val, + ) + } + XENIUMRANGER_IMPORT_SEGMENTATION( + ch_imp_seg_inputs + ) + + ch_redefined_bundle = XENIUMRANGER_IMPORT_SEGMENTATION.out.outs + + emit: + coordinate_space = ch_coordinate_space // channel: [ "pixels" ] + redefined_bundle = ch_redefined_bundle // channel: [ val(meta), ["redefined-xenium-bundle"] ] +} diff --git a/subworkflows/local/baysor_run_prior_segmentation_mask/meta.yml b/subworkflows/local/baysor_run_prior_segmentation_mask/meta.yml new file mode 100644 index 00000000..f01f44c7 --- /dev/null +++ b/subworkflows/local/baysor_run_prior_segmentation_mask/meta.yml @@ -0,0 +1,58 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "baysor_run_prior_segmentation_mask" +description: to run the `baysor run` command if a prior segmentation mask is available +keywords: + - baysor + - baysor run + - segmentation + - segmentation mask + - xeniumranger import-segmentation + - image-based segmentation + - transcript filtering + - polygons +components: + - baysor/preprocess/transcripts + - baysor/run + - xeniumranger/import/segmentation +input: + - ch_bundle_path: + description: | + path to the xenium bundle + Structure: [ val(meta), path("path-to-xenium-bundle") ] + - ch_transcripts_parquet: + description: | + input parquet file from the xenium bundle + Structure: [ val(meta), path("path-to-transcripts.parquet") ] + - ch_segmentation_mask: + description: | + prior segmentation mask filepath + Structure: [ path("path-to-prior-segmentation-mask.tif") ] + - ch_config: + description: | + config file for the xenium baysor run (stored in assets/config/xenium.toml) + Structure: [ path("path-to-xenium.toml") ] +output: + - segmentation: + description: | + the segmentation.csv file generated from the baysor run command + Structure: [ val(meta), path("segmentation.csv") ] + - polygons2d: + description: | + the segmentation_polygons_2d.json file generated from the baysor run command + Structure: [ val(meta), path("segmentation_polygons_2d.json") ] + - htmls: + description: | + the html files generated from the baysor run command + Structure: [ val(meta), path("*.html") ] + - coordinate_space: + description: | + the coordinate space in which xeniumranger import-segmentation was run + Structure: [ val("microns") ] + - redefined_bundle: + description: | + the redefined xenium bundle generated with the segmentation results from baysor + Structure: [ val(meta), ["redefined-xenium-bundle"] ] +authors: + - "@khersameesh24" +maintainers: + - "@khersameesh24" diff --git a/subworkflows/local/baysor_run_transcripts_parquet/main.nf b/subworkflows/local/baysor_run_transcripts_parquet/main.nf new file mode 100644 index 00000000..aff27051 --- /dev/null +++ b/subworkflows/local/baysor_run_transcripts_parquet/main.nf @@ -0,0 +1,166 @@ +// +// Unified Baysor subworkflow: handles both tiled and non-tiled paths. +// +// When baysor_tiling=true: divide → per-patch Baysor → stitch → xeniumranger +// When baysor_tiling=false: preprocess → Baysor → xeniumranger +// +// Prior segmentation support: +// Column-based (cells): works with both tiled and non-tiled +// Image-based (cellpose): non-tiled only (mask passed to Baysor) +// + +include { XENIUM_PATCH_DIVIDE } from '../../../modules/local/xenium_patch/divide/main' +include { PARQUET_TO_CSV } from '../../../modules/local/parquet_to_csv/main' +include { BAYSOR_RUN } from '../../../modules/local/baysor/run/main' +include { BAYSOR_PREPROCESS_TRANSCRIPTS } from '../../../modules/local/baysor/preprocess/main' +include { XENIUM_PATCH_STITCH } from '../../../modules/local/xenium_patch/stitch/main' +include { RECONSTRUCT_PATCHES } from '../../../modules/local/utility/reconstruct_patches/main' +include { XENIUMRANGER_IMPORT_SEGMENTATION } from '../../../modules/nf-core/xeniumranger/import-segmentation/main' + + +workflow BAYSOR_RUN_TRANSCRIPTS_PARQUET { + + take: + ch_bundle_path // channel: [ val(meta), ["path-to-xenium-bundle"] ] + ch_transcripts_file // channel: [ val(meta), ["transcripts.parquet"] ] + ch_morphology_image // channel: [ val(meta), ["morphology_focus.ome.tif"] ] + ch_config // channel: ["path-to-xenium.toml"] + ch_prior_mask // channel: [ val(meta), ["resized_mask.tif"] ] or empty (cellpose) + baysor_config // value: path to baysor config TOML (or null) + baysor_scale // value: Baysor --scale for non-tiled runs + baysor_tiling // value: bool — enable tiling + baysor_tiling_scale // value: Baysor --scale for tiled runs + max_x // value: spatial filter upper x bound + max_y // value: spatial filter upper y bound + min_qv // value: minimum transcript QV + min_x // value: spatial filter lower x bound + min_y // value: spatial filter lower y bound + + main: + + ch_coordinate_space = channel.value("microns") + + if ( baysor_tiling ) { + + // ── TILED PATH ────────────────────────────────────────────────── + + // Step 1: Divide transcripts into overlapping patches + ch_divide_input = ch_transcripts_file + .join(ch_morphology_image, by: 0) + + XENIUM_PATCH_DIVIDE ( ch_divide_input ) + + // Step 2: Fan out patches for parallel processing + ch_patches = XENIUM_PATCH_DIVIDE.out.patch_transcripts + .transpose() + .map { meta, parquet_file -> + def patch_id = parquet_file.parent.name + def patch_meta = meta.clone() + patch_meta.sample_id = meta.id + patch_meta.patch_id = patch_id + patch_meta.id = "${meta.id}_${patch_id}" + tuple(patch_meta, parquet_file) + } + + // Step 2b: Convert parquet to CSV (Baysor Julia Parquet.jl incompatibility) + PARQUET_TO_CSV ( ch_patches ) + + // Step 3: Run Baysor on each patch independently + // Use baysor_tiling_scale (larger than baysor_scale) to compensate for EM + // convergence producing smaller cells on tile-sized datasets. + BAYSOR_RUN ( + PARQUET_TO_CSV.out.csv.map { meta, transcripts -> + tuple(meta, transcripts, [], baysor_config ? file(baysor_config) : [], baysor_tiling_scale) + } + ) + + // Step 4: Gather patch results per sample and reconstruct patches directory + ch_baysor_results = BAYSOR_RUN.out.segmentation + .map { patch_meta, csv, polygons -> + tuple(patch_meta.sample_id, [patch_meta.patch_id, csv, polygons]) + } + .groupTuple(by: 0) + .map { sample_id, patch_data -> + def sorted = patch_data.sort { it -> it[0] } + def patch_ids = sorted.collect { it -> it[0] } + def csvs = sorted.collect { it -> it[1] } + def geojsons = sorted.collect { it -> it[2] } + tuple(sample_id, patch_ids, csvs, geojsons) + } + + ch_stitch_input = ch_baysor_results + .join( + XENIUM_PATCH_DIVIDE.out.grid + .map { meta, grid -> tuple(meta.id, grid) } + ) + .map { sample_id, patch_ids, csvs, geojsons, grid_json -> + def meta = [id: sample_id] + tuple(meta, grid_json, patch_ids, csvs, geojsons) + } + + // Step 5: Stitch patch results + RECONSTRUCT_PATCHES ( ch_stitch_input ) + XENIUM_PATCH_STITCH ( RECONSTRUCT_PATCHES.out.patches_dir ) + + // Step 6: xeniumranger import-segmentation (tiled) + // spatialaxe signature: meta, bundle, transcript_assignment, viz_polygons, nuclei, cells, coordinate_transform, units + ch_xr = ch_bundle_path + .combine(XENIUM_PATCH_STITCH.out.xr_polygons_transcript, by: 0) + .map { + meta, bundle, xr_cell_polygons, xr_transcript_metadata -> tuple( + meta, bundle, + xr_transcript_metadata, + xr_cell_polygons, + [], [], [], + "microns" + ) + } + + XENIUMRANGER_IMPORT_SEGMENTATION (ch_xr) + + } else { + + // ── NON-TILED PATH ────────────────────────────────────────────── + + // Preprocess: parquet → CSV with optional spatial/QV filtering + BAYSOR_PREPROCESS_TRANSCRIPTS( + ch_transcripts_file, + min_qv, + max_x, + min_x, + max_y, + min_y, + ) + + // Run Baysor on full transcripts (with optional image-based prior mask) + ch_csv_with_mask = BAYSOR_PREPROCESS_TRANSCRIPTS.out.transcripts_file + .join(ch_prior_mask, by: 0, remainder: true) + .map { meta, transcripts, mask -> + tuple(meta, transcripts, mask ?: []) + } + ch_baysor_input = ch_csv_with_mask + .combine(ch_config) + .map { meta, transcripts, mask, config -> + tuple(meta, transcripts, mask, config, baysor_scale) + } + BAYSOR_RUN(ch_baysor_input) + + // xeniumranger import-segmentation (non-tiled) + // spatialaxe signature: meta, bundle, transcript_assignment, viz_polygons, nuclei, cells, coordinate_transform, units + ch_xr = ch_bundle_path + .combine(BAYSOR_RUN.out.segmentation, by: 0) + .map { meta, bundle, segmentation_csv, polygons2d -> + tuple(meta, bundle, + segmentation_csv, + polygons2d, + [], [], [], + ch_coordinate_space.val) + } + + XENIUMRANGER_IMPORT_SEGMENTATION(ch_xr) + } + + emit: + redefined_bundle = XENIUMRANGER_IMPORT_SEGMENTATION.out.outs + coordinate_space = ch_coordinate_space +} diff --git a/subworkflows/local/baysor_run_transcripts_parquet/meta.yml b/subworkflows/local/baysor_run_transcripts_parquet/meta.yml new file mode 100644 index 00000000..ccc4a816 --- /dev/null +++ b/subworkflows/local/baysor_run_transcripts_parquet/meta.yml @@ -0,0 +1,54 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "baysor_run_transcripts_parquet" +description: to run the `baysor run` command with the transcripts.parquet file as a coordinate-based segmentation run +keywords: + - baysor + - baysor run + - segmentation + - xeniumranger import-segmentation + - coordinate-based segmentation + - transcript filtering + - polygons +components: + - baysor/preprocess/transcripts + - baysor/run + - xeniumranger/import/segmentation + - split/transcripts +input: + - ch_bundle_path: + description: | + path to the xenium bundle + Structure: [ val(meta), path("path-to-xenium-bundle") ] + - ch_transcripts_parquet: + description: | + input parquet file from the xenium bundle + Structure: [ val(meta), path("path-to-transcripts.parquet") ] + - ch_config: + description: | + config file for the xenium baysor run (stored in assets/config/xenium.toml) + Structure: [ path("path-to-xenium.toml") ] +output: + - segmentation: + description: | + the segmentation.csv file generated from the baysor run command + Structure: [ val(meta), path("segmentation.csv") ] + - polygons2d: + description: | + the segmentation_polygons_2d.json file generated from the baysor run command + Structure: [ val(meta), path("segmentation_polygons_2d.json") ] + - htmls: + description: | + the html files generated from the baysor run command + Structure: [ val(meta), path("*.html") ] + - coordinate_space: + description: | + the coordinate space in which xeniumranger import-segmentation was run + Structure: [ val("microns") ] + - redefined_bundle: + description: | + the redefined xenium bundle generated with the segmentation results from baysor + Structure: [ val(meta), ["redefined-xenium-bundle"] ] +authors: + - "@khersameesh24" +maintainers: + - "@khersameesh24" diff --git a/subworkflows/local/baysor_run_transcripts_parquet_tiled/main.nf b/subworkflows/local/baysor_run_transcripts_parquet_tiled/main.nf new file mode 100644 index 00000000..70045331 --- /dev/null +++ b/subworkflows/local/baysor_run_transcripts_parquet_tiled/main.nf @@ -0,0 +1,104 @@ +// +// Runs baysor with tiling: divide transcripts -> preprocess per patch -> baysor per patch -> stitch -> xeniumranger +// + +include { XENIUM_PATCH_DIVIDE } from '../../../modules/local/xenium_patch/divide/main' +include { BAYSOR_PREPROCESS_TRANSCRIPTS } from '../../../modules/local/baysor/preprocess/main' +include { BAYSOR_RUN } from '../../../modules/local/baysor/run/main' +include { XENIUM_PATCH_STITCH } from '../../../modules/local/xenium_patch/stitch/main' +include { XENIUMRANGER_IMPORT_SEGMENTATION } from '../../../modules/nf-core/xeniumranger/import-segmentation/main' + +workflow BAYSOR_RUN_TRANSCRIPTS_PARQUET_TILED { + + take: + ch_bundle_path // channel: [ val(meta), ["xenium-bundle"] ] + ch_transcripts_file // channel: [ val(meta), ["transcripts.parquet"] ] + ch_config // channel: ["path-to-xenium.toml"] + max_x // value: spatial filter upper x bound + max_y // value: spatial filter upper y bound + min_qv // value: minimum transcript QV + min_x // value: spatial filter lower x bound + min_y // value: spatial filter lower y bound + + main: + + ch_coordinate_space = channel.value("microns") + + // Step 1: Divide transcripts into overlapping patches + XENIUM_PATCH_DIVIDE ( ch_transcripts_file ) + + // Step 2: Fan out patches for parallel processing + ch_patches = XENIUM_PATCH_DIVIDE.out.patch_transcripts + .transpose() + .map { meta, parquet_file -> + def patch_id = parquet_file.parent.name + def patch_meta = meta.clone() + patch_meta.sample_id = meta.id + patch_meta.patch_id = patch_id + patch_meta.id = "${meta.id}_${patch_id}" + tuple(patch_meta, parquet_file) + } + + // Step 3: Preprocess each patch's parquet to CSV for Baysor 0.7.1 compatibility + // Baysor's Julia Parquet.jl cannot read zstd-compressed parquet files + BAYSOR_PREPROCESS_TRANSCRIPTS ( + ch_patches, + min_qv, + max_x, + min_x, + max_y, + min_y, + ) + + // Step 4: Run Baysor on each patch independently + ch_baysor_input = BAYSOR_PREPROCESS_TRANSCRIPTS.out.transcripts_file + .combine(ch_config) + .map { meta, transcripts, config -> + tuple(meta, transcripts, [], config, 30) + } + + BAYSOR_RUN ( ch_baysor_input ) + + // Step 5: Gather patch results per sample for stitching + ch_for_stitch = BAYSOR_RUN.out.segmentation + .map { patch_meta, csv, polygons -> + tuple(patch_meta.sample_id, [patch_meta.patch_id, csv, polygons]) + } + .groupTuple(by: 0) + .map { sample_id, patch_data -> + def sorted = patch_data.sort { it -> it[0] } + def patch_ids = sorted.collect { it -> it[0] } + def csvs = sorted.collect { it -> it[1] } + def geojsons = sorted.collect { it -> it[2] } + tuple(sample_id, patch_ids, csvs, geojsons) + } + + // Combine with grid metadata from DIVIDE + ch_stitch_input = ch_for_stitch + .join( + XENIUM_PATCH_DIVIDE.out.grid + .map { meta, grid -> tuple(meta.id, grid) } + ) + .map { sample_id, patch_ids, csvs, geojsons, grid_json -> + def meta = [id: sample_id] + tuple(meta, grid_json, patch_ids, csvs, geojsons) + } + + // Step 6: Stitch patch results into unified segmentation output + XENIUM_PATCH_STITCH ( ch_stitch_input ) + + // Step 7: Run xeniumranger import-segmentation + // Note: Cell size filtering is handled inline by STITCH via --filter-method + ch_xr = ch_bundle_path + .combine(XENIUM_PATCH_STITCH.out.xr_polygons_transcript, by: 0) + .combine(ch_coordinate_space) + .map { meta, bundle, geojson, csv, coord_space -> + tuple(meta, bundle, csv, geojson, [], [], [], coord_space) + } + + XENIUMRANGER_IMPORT_SEGMENTATION ( ch_xr ) + + emit: + coordinate_space = ch_coordinate_space // channel: [ "microns" ] + redefined_bundle = XENIUMRANGER_IMPORT_SEGMENTATION.out.outs // channel: [ val(meta), ["redefined-xenium-bundle"] ] +} diff --git a/subworkflows/local/cellpose_baysor_import_segmentation/main.nf b/subworkflows/local/cellpose_baysor_import_segmentation/main.nf new file mode 100644 index 00000000..38bbcc74 --- /dev/null +++ b/subworkflows/local/cellpose_baysor_import_segmentation/main.nf @@ -0,0 +1,191 @@ +// +// Run the cellpose, baysor and import-segmentation flow +// + +include { RESOLIFT } from '../../../modules/local/resolift/main' +include { BAYSOR_RUN } from '../../../modules/local/baysor/run/main' +include { CELLPOSE as CELLPOSE_CELLS } from '../../../modules/nf-core/cellpose/main' +include { EXTRACT_DAPI } from '../../../modules/local/utility/extract_dapi/main' +include { STARDIST as STARDIST_NUCLEI } from '../../../modules/nf-core/stardist/main' +include { CONVERT_MASK_UINT32 } from '../../../modules/local/utility/convert_mask_uint32/main' +include { BAYSOR_PREPROCESS_TRANSCRIPTS } from '../../../modules/local/baysor/preprocess/main' +include { RESIZE_TIF } from '../../../modules/local/utility/resize_tif/main' +include { GET_TRANSCRIPTS_COORDINATES } from '../../../modules/local/utility/get_coordinates/main' +include { XENIUMRANGER_IMPORT_SEGMENTATION } from '../../../modules/nf-core/xeniumranger/import-segmentation/main' + +workflow CELLPOSE_BAYSOR_IMPORT_SEGMENTATION { + take: + ch_morphology_image // channel: [ val(meta), ["path-to-morphology.ome.tif"] ] + ch_bundle_path // channel: [ val(meta), ["path-to-xenium-bundle"] ] + ch_transcripts_file // channel: [ val(meta), ["path-to-transcripts.parquet"] ] + ch_experiment_metadata // channel: [ val(meta), ["path-to-experiment.xenium"] ] + ch_config // channel: ["path-to-xenium.toml"] + cell_segmentation_only // value: bool + cellpose_model // value: path to cellpose model (or null) + max_x // value: spatial filter upper x bound + max_y // value: spatial filter upper y bound + min_qv // value: minimum transcript QV + min_x // value: spatial filter lower x bound + min_y // value: spatial filter lower y bound + nucleus_segmentation_only // value: bool + sharpen_tiff // value: bool + stardist_nuclei_model // value: stardist pretrained model name + + main: + + ch_transcripts = channel.empty() + ch_imp_seg_inputs = channel.empty() + ch_coordinate_space = channel.value("microns") + + + // Use empty list when no model is provided; path input for official cellpose module + cellpose_model_path = cellpose_model ? file(cellpose_model) : [] + stardist_model = stardist_nuclei_model ?: '2D_versatile_fluo' + + // sharpen morphology tiff if param - sharpen_tiff is true + if (sharpen_tiff) { + + RESOLIFT(ch_morphology_image) + + ch_image = RESOLIFT.out.enhanced_tiff + } + else { + + ch_image = ch_morphology_image + } + + + // run cellpose on the morphology (enhanced) tiff + if (cell_segmentation_only) { + + CELLPOSE_CELLS(ch_image, cellpose_model_path) + } + + if (nucleus_segmentation_only) { + + // Extract DAPI channel, run StarDist, convert to uint32 + EXTRACT_DAPI(ch_image) + + STARDIST_NUCLEI(EXTRACT_DAPI.out.dapi, [stardist_model, []]) + + CONVERT_MASK_UINT32(STARDIST_NUCLEI.out.mask) + } + + + // Always preprocess transcripts.parquet to CSV for Baysor 0.7.1 compatibility. + // Baysor's Julia Parquet.jl cannot read zstd-compressed parquet files from Xenium bundles. + // Also applies optional spatial/QV filtering when filter_transcripts is true. + BAYSOR_PREPROCESS_TRANSCRIPTS( + ch_transcripts_file, + min_qv, + max_x, + min_x, + max_y, + min_y, + ) + ch_transcripts = BAYSOR_PREPROCESS_TRANSCRIPTS.out.transcripts_file + + + // run baysor with cellpose results + if (nucleus_segmentation_only) { + + // check if the size of the segmentation mask matches the max transcripts coordinate range + ch_resizetif_input = ch_transcripts + .combine(CONVERT_MASK_UINT32.out.mask, by: 0) + .combine(ch_experiment_metadata, by: 0) + .map { meta, transcripts, mask, exp_meta -> + tuple( + meta, + transcripts, + mask, + exp_meta, + ) + } + RESIZE_TIF(ch_resizetif_input) + + // run baysor with nuclei mask + ch_baysor_input = ch_transcripts + .combine(RESIZE_TIF.out.resized_mask, by: 0) + .combine(ch_config) + .map { meta, transcripts, mask, config -> + tuple( + meta, + transcripts, + mask, + config, + 30, + ) + } + BAYSOR_RUN(ch_baysor_input) + } + else if (cell_segmentation_only) { + + // check if the size of the segmentation mask matches the max transcripts coordinate range + ch_resizetif_input = ch_transcripts + .combine(CELLPOSE_CELLS.out.mask, by: 0) + .combine(ch_experiment_metadata, by: 0) + .map { meta, transcripts, mask, exp_meta -> + tuple( + meta, + transcripts, + mask, + exp_meta, + ) + } + RESIZE_TIF(ch_resizetif_input) + + // run baysor with cell mask + ch_baysor_input = ch_transcripts + .combine(RESIZE_TIF.out.resized_mask, by: 0) + .combine(ch_config) + .map { meta, transcripts, mask, config -> + tuple( + meta, + transcripts, + mask, + config, + 30, + ) + } + BAYSOR_RUN(ch_baysor_input) + } + else { + + // run baysor without cell/nuclei mask + ch_baysor_input = ch_transcripts + .combine(ch_config) + .map { meta, transcripts, config -> + tuple( + meta, + transcripts, + [], + config, + 30, + ) + } + BAYSOR_RUN(ch_baysor_input) + } + + + // run import-segmentation with baysor outs + ch_imp_seg_inputs = ch_bundle_path + .combine(BAYSOR_RUN.out.segmentation, by: 0) + .map { meta, bundle, segmentation_csv, polygons2d -> + tuple( + meta, + bundle, + segmentation_csv, + polygons2d, + [], + [], + [], + ch_coordinate_space.val, + ) + } + + XENIUMRANGER_IMPORT_SEGMENTATION(ch_imp_seg_inputs) + + emit: + coordinate_space = ch_coordinate_space // channel: [ val("microns") ] + redefined_bundle = XENIUMRANGER_IMPORT_SEGMENTATION.out.outs // channel: [ val(meta), ["redefined-xenium-bundle"] ] +} diff --git a/subworkflows/local/cellpose_baysor_import_segmentation/meta.yml b/subworkflows/local/cellpose_baysor_import_segmentation/meta.yml new file mode 100644 index 00000000..0c18e495 --- /dev/null +++ b/subworkflows/local/cellpose_baysor_import_segmentation/meta.yml @@ -0,0 +1,86 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "cellpose_baysor_import_segmentation" +description: | + combine image-based segmentation approach with cellpose and integrate results from coordinate-based segmentation + through baysor to run import-segmentation in micron coordinate space +keywords: + - baysor + - cellpose + - baysor run + - segmentation + - xeniumranger import-segmentation + - image-based segmentation + - coordinate-based segmentation + - transcript filtering + - polygons +components: + - cellpose + - resolift + - resize/tif + - get/transcripts/coordinates + - baysor/preprocess/transcripts + - baysor/run + - xeniumranger/import/segmentation + - split/transcripts +input: + - ch_morphology_image: + description: | + path to the morphology.ome.tif file + Structure: [ val(meta), path("path-to-morphology.ome.tif") ] + - ch_bundle_path: + description: | + path to the xenium bundle + Structure: [ val(meta), path("path-to-xenium-bundle") ] + - ch_transcripts_parquet: + description: | + input parquet file from the xenium bundle + Structure: [ val(meta), path("path-to-transcripts.parquet") ] + - ch_config: + description: | + config file for the xenium baysor run (stored in assets/config/xenium.toml) + Structure: [ path("path-to-xenium.toml") ] +output: + - cells_mask: + description: | + cell segmentation mask generated by running Cellpose with the cpsam algorithm + Structure: [ val(meta), path("*masks.tif") ] + - cells_flows: + description: | + cell flows generated by running Cellpose with the cpsam algorithm + Structure: [ val(meta), path("*flows.tif") ] + - cells_cells: + description: | + cell segmentation mask as a numpy array generated by running Cellpose with the cpsam algorithm + Structure: [ val(meta), path("*seg.npy") ] + - nuclei_mask: + description: | + nuclei segmentation mask generated by running Cellpose with the nuclei algorithm + Structure: [ val(meta), path("*masks.tif") ] + - nuclei_flows: + description: | + nuclei flows generated by running Cellpose with the nuclei algorithm + Structure: [ val(meta), path("*masks.tif") ] + - nuclei_cells: + description: | + nuclei segmentation mask as a numpy array generated by running Cellpose with the nuclei algorithm + Structure: [ val(meta), path("*seg.npy") ] + - segmentation: + description: | + the segmentation.csv file generated from the baysor run command + Structure: [ val(meta), path("*segmentation.csv") ] + - polygons2d: + description: | + the segmentation_polygons_2d.json file generated from the baysor run command + Structure: [ val(meta), path("*segmentation_polygons_2d.json") ] + - coordinate_space: + description: | + the coordinate space in which xeniumranger import-segmentation was run + Structure: [ val("microns") ] + - redefined_bundle: + description: | + the redefined xenium bundle generated with the segmentation results from baysor + Structure: [ val(meta), ["redefined-xenium-bundle"] ] +authors: + - "@khersameesh24" +maintainers: + - "@khersameesh24" diff --git a/subworkflows/local/cellpose_resolift_morphology_ome_tif/main.nf b/subworkflows/local/cellpose_resolift_morphology_ome_tif/main.nf new file mode 100644 index 00000000..6bb38ded --- /dev/null +++ b/subworkflows/local/cellpose_resolift_morphology_ome_tif/main.nf @@ -0,0 +1,141 @@ +// +// Run cellpose on the morphology tiff +// + +include { RESOLIFT } from '../../../modules/local/resolift/main' +include { DOWNSCALE_MORPHOLOGY } from '../../../modules/local/utility/downscale_morphology/main' +include { UPSCALE_MASK as UPSCALE_CELLS } from '../../../modules/local/utility/upscale_mask/main' +include { CELLPOSE as CELLPOSE_CELLS } from '../../../modules/nf-core/cellpose/main' +include { EXTRACT_DAPI } from '../../../modules/local/utility/extract_dapi/main' +include { STARDIST as STARDIST_NUCLEI } from '../../../modules/nf-core/stardist/main' +include { CONVERT_MASK_UINT32 } from '../../../modules/local/utility/convert_mask_uint32/main' +include { XENIUMRANGER_IMPORT_SEGMENTATION } from '../../../modules/nf-core/xeniumranger/import-segmentation/main' + +workflow CELLPOSE_RESOLIFT_MORPHOLOGY_OME_TIF { + take: + ch_morphology_image // channel: [ val(meta), ["path-to-morphology.ome.tiff"] ] + ch_bundle_path // channel: [ val(meta), ["path-to-xenium-bundle"] ] + cellpose_downscale // value: bool + cellpose_model // value: path to cellpose model (or null) + nucleus_segmentation_only // value: bool + sharpen_tiff // value: bool + stardist_nuclei_model // value: stardist pretrained model name + + main: + + ch_imp_seg_inputs = channel.empty() + ch_coordinate_space = channel.value("pixels") + + // Use empty list when no model is provided; path input for official cellpose module + cellpose_model_path = cellpose_model ? file(cellpose_model) : [] + stardist_model = stardist_nuclei_model ?: '2D_versatile_fluo' + + // sharpen morphology tiff if param - sharpen_tiff is true + if (sharpen_tiff) { + + RESOLIFT(ch_morphology_image) + + ch_image = RESOLIFT.out.enhanced_tiff + } + else { + + ch_image = ch_morphology_image + } + + // Optional pre-downscale for large images to avoid cellpose OOM + // Only needed when running cellpose for cells (not nucleus_segmentation_only) + if (cellpose_downscale && !nucleus_segmentation_only) { + + DOWNSCALE_MORPHOLOGY(ch_image) + + ch_cellpose_input = DOWNSCALE_MORPHOLOGY.out.downscaled + ch_scale_info = DOWNSCALE_MORPHOLOGY.out.scale_info + } + else { + + ch_cellpose_input = ch_image + ch_scale_info = channel.empty() + } + + // run cellpose on morphology tiff (or downscaled version) + if (!nucleus_segmentation_only) { + CELLPOSE_CELLS(ch_cellpose_input, cellpose_model_path) + } + + // StarDist for nuclei — extract DAPI first, then run on original resolution + EXTRACT_DAPI(ch_image) + + STARDIST_NUCLEI(EXTRACT_DAPI.out.dapi, [stardist_model, []]) + + // Convert StarDist mask to uint32 for XeniumRanger compatibility + CONVERT_MASK_UINT32(STARDIST_NUCLEI.out.mask) + + ch_nuclei_mask = CONVERT_MASK_UINT32.out.mask + + // Upscale cellpose cells mask back to original resolution if downscaled + // StarDist nuclei mask is already at original resolution (no upscale needed) + if (cellpose_downscale) { + + if (!nucleus_segmentation_only) { + ch_cells_for_upscale = CELLPOSE_CELLS.out.mask + .combine(ch_scale_info, by: 0) + UPSCALE_CELLS(ch_cells_for_upscale) + ch_cells_mask = UPSCALE_CELLS.out.upscaled_mask + } + } + else { + + if (!nucleus_segmentation_only) { + ch_cells_mask = CELLPOSE_CELLS.out.mask + } + } + + // run import-segmentation with cellpose results + if (nucleus_segmentation_only) { + + ch_imp_seg_inputs = ch_bundle_path + .combine(ch_nuclei_mask, by: 0) + .combine(ch_coordinate_space) + .map { meta, bundle, nuclei_seg, coord_space -> + tuple( + meta, + bundle, + [], + [], + nuclei_seg, + [], + [], + coord_space, + ) + } + XENIUMRANGER_IMPORT_SEGMENTATION( + ch_imp_seg_inputs + ) + } + else { + + ch_imp_seg_inputs = ch_bundle_path + .combine(ch_cells_mask, by: 0) + .combine(ch_nuclei_mask, by: 0) + .combine(ch_coordinate_space) + .map { meta, bundle, cells_seg, nuclei_seg, coord_space -> + tuple( + meta, + bundle, + [], + [], + nuclei_seg, + cells_seg, + [], + coord_space, + ) + } + XENIUMRANGER_IMPORT_SEGMENTATION( + ch_imp_seg_inputs + ) + } + + emit: + coordinate_space = ch_coordinate_space // channel: [ ["pixels"] ] + redefined_bundle = XENIUMRANGER_IMPORT_SEGMENTATION.out.outs // channel: [ val(meta), ["redefined-xenium-bundle"] ] +} diff --git a/subworkflows/local/cellpose_resolift_morphology_ome_tif/meta.yml b/subworkflows/local/cellpose_resolift_morphology_ome_tif/meta.yml new file mode 100644 index 00000000..b0c99c85 --- /dev/null +++ b/subworkflows/local/cellpose_resolift_morphology_ome_tif/meta.yml @@ -0,0 +1,59 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "cellpose_resolift_morphology_ome_tif" +description: | + image-based segmentation approach with cellpose to run import-segmentation in pixel coordinate space +keywords: + - cellpose + - segmentation + - xeniumranger import-segmentation + - image-based segmentation +components: + - cellpose + - resolift + - xeniumranger/import/segmentation +input: + - ch_morphology_image: + description: | + path to the morphology.ome.tif file + Structure: [ val(meta), path("path-to-morphology.ome.tif") ] + - ch_bundle_path: + description: | + path to the xenium bundle + Structure: [ val(meta), path("path-to-xenium-bundle") ] +output: + - cells_mask: + description: | + cell segmentation mask generated by running Cellpose with the cpsam algorithm + Structure: [ val(meta), path("*masks.tif") ] + - cells_flows: + description: | + cell flows generated by running Cellpose with the cpsam algorithm + Structure: [ val(meta), path("*flows.tif") ] + - cells_cells: + description: | + cell segmentation mask as a numpy array generated by running Cellpose with the cpsam algorithm + Structure: [ val(meta), path("*seg.npy") ] + - nuclei_mask: + description: | + nuclei segmentation mask generated by running Cellpose with the nuclei algorithm + Structure: [ val(meta), path("*masks.tif") ] + - nuclei_flows: + description: | + nuclei flows generated by running Cellpose with the nuclei algorithm + Structure: [ val(meta), path("*masks.tif") ] + - nuclei_cells: + description: | + nuclei segmentation mask as a numpy array generated by running Cellpose with the nuclei algorithm + Structure: [ val(meta), path("*seg.npy") ] + - coordinate_space: + description: | + the coordinate space in which xeniumranger import-segmentation was run + Structure: [ val("pixels") ] + - redefined_bundle: + description: | + the redefined xenium bundle generated with the segmentation results from baysor + Structure: [ val(meta), ["redefined-xenium-bundle"] ] +authors: + - "@khersameesh24" +maintainers: + - "@khersameesh24" diff --git a/subworkflows/local/ficture_preprocess_model/main.nf b/subworkflows/local/ficture_preprocess_model/main.nf new file mode 100644 index 00000000..c45713ec --- /dev/null +++ b/subworkflows/local/ficture_preprocess_model/main.nf @@ -0,0 +1,39 @@ +// +// Run ficture preprocess and model modules +// + +include { FICTURE_PREPROCESS } from '../../../modules/local/ficture/preprocess/main' +include { FICTURE } from '../../../modules/local/ficture/model/main' +include { PARQUET_TO_CSV } from '../../../modules/local/utility/parquet_to_csv/main' + + + +workflow FICTURE_PREPROCESS_MODEL { + take: + ch_transcripts_file // channel: [ val(meta), [ "transcripts.parquet" ] ] + ch_features // channel: [ ["features"] ] + features // value: path to features list (or null) + + main: + + // convert parquet to csv + PARQUET_TO_CSV(ch_transcripts_file, ".csv") + + // run ficture preprocessing + ch_transcripts = PARQUET_TO_CSV.out.transcripts_csv + + FICTURE_PREPROCESS(ch_transcripts, ch_features) + + // run the ficture wrapper pipeline + ch_features_clean = features ? FICTURE_PREPROCESS.out.features : channel.value([]) + FICTURE( + FICTURE_PREPROCESS.out.transcripts, + FICTURE_PREPROCESS.out.coordinate_minmax, + ch_features_clean, + ) + emit: + transcripts = FICTURE_PREPROCESS.out.transcripts // channel: [ val(meta), [ "*processed_transcripts.tsv.gz" ] ] + coordinate_minmax = FICTURE_PREPROCESS.out.coordinate_minmax // channel: [ "*coordinate_minmax.tsv" ] + features = FICTURE_PREPROCESS.out.features // channel: [ "*feature.clean.tsv.gz" ] + results = FICTURE.out.results // channel: [ val(meta), [ "results/** ] ] +} diff --git a/subworkflows/local/ficture_preprocess_model/meta.yml b/subworkflows/local/ficture_preprocess_model/meta.yml new file mode 100644 index 00000000..17840559 --- /dev/null +++ b/subworkflows/local/ficture_preprocess_model/meta.yml @@ -0,0 +1,34 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "ficture_preprocess_model" +description: Scalable segmentation-free analysis of sub-micron resolution spatial transcriptomics +keywords: + - ficture + - ficture preprocess + - segmentation-free analysis + - pixel level factor analysis +components: + - ficture/preprocess + - ficture + - parquet/to/csv +input: + - ch_transcripts_parquet: + description: | + file containing the molecular or pixel level information, the required columns are X, Y, gene, and Count + Structure: [ val(meta), path("transcripts.parquet") ] + - ch_features: + description: | + unique names of genes that should be used in analysis + Structure: [ [gene1,gene2,gene3,gene4] ] +output: + - transcripts: + description: xyz + - coordinate_minmax: + description: xyz + - features: + description: xyz + - results: + description: xyz +authors: + - "@khersameesh24" +maintainers: + - "@khersameesh24" diff --git a/subworkflows/local/opt_flip_track_stat/main.nf b/subworkflows/local/opt_flip_track_stat/main.nf new file mode 100644 index 00000000..839768d2 --- /dev/null +++ b/subworkflows/local/opt_flip_track_stat/main.nf @@ -0,0 +1,34 @@ +include { OPT_FLIP } from '../../../modules/nf-core/opt/flip/main' +include { OPT_TRACK } from '../../../modules/nf-core/opt/track/main' +include { OPT_STAT } from '../../../modules/nf-core/opt/stat/main' + + +workflow OPT_FLIP_TRACK_STAT { + take: + ch_probe_fasta // channel: [ val(meta), [ "panel_probes_sequences.fasta" ] ] + ch_references // channel: [ val(meta), ["reference_annotations.gff"], ["reference_annotations.fa"] ] + ch_gene_synonyms // channel: [ "path-to-gene-synonyms" ] + + main: + + ch_versions = channel.empty() + ch_summary = channel.empty() + + // correct probes that are aligning to opposite strand with `flip` + OPT_FLIP(ch_probe_fasta, ch_references) + ch_versions = ch_versions.mix(OPT_FLIP.out.versions) + + // align query probe sequences to target transcriptome + OPT_TRACK(OPT_FLIP.out.fwd_oriented_fa, ch_references) + ch_versions = ch_versions.mix(OPT_TRACK.out.versions) + + // summarizes opt binding predictions + OPT_STAT(OPT_TRACK.out.probes2target, OPT_FLIP.out.fwd_oriented_fa, ch_gene_synonyms) + ch_versions = ch_versions.mix(OPT_STAT.out.versions) + + ch_summary = OPT_STAT.out.summary + + emit: + summary = ch_summary // channel: [ val(meta), ["collapsed_summary.tsv", "other-summary-files"]] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/opt_flip_track_stat/meta.yml b/subworkflows/local/opt_flip_track_stat/meta.yml new file mode 100644 index 00000000..ea17599b --- /dev/null +++ b/subworkflows/local/opt_flip_track_stat/meta.yml @@ -0,0 +1,45 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "opt_flip_track_stat" +description: opt is a simple program that aligns probe sequences to transcript sequences + to detect potential off-target probe activity +keywords: + - opt + - flip + - track + - stat + - off-target probes +components: + - opt/flip + - opt/track + - opt/stat +input: + - ch_probe_fasta: + type: file + description: | + Input channel containing the sample info. and associated probe panel sequences fasta file + Structure: [ val(meta), path("porbe_panel_seqeunces.fasta") ] + pattern: "*.fasta" + - ch_references: + type: file + description: | + Input channel containing the sample info. and the references to be used + Structure: [ val(meta), path("reference_annotations.gff"), path("reference_annotations.fa") ] + pattern: "*.{fa,gff}" + - ch_gene_synonyms: + type: file + description: | + Input channel containing the Gene synonyms that may have been counted as off-targets but + simply differ in name (optional input) + Structure: [ val(meta), path("gene_synonyms.csv") ] + pattern: "*.csv" +output: + - summary: + type: file + description: | + Groovy Map containing summary of the forward oriented probes generated with the panel sequences opt flip and track + Structure: [ val(meta), path("collapsed_summary.tsv") ] + pattern: "*.tsv" +authors: + - "@khersameesh24" +maintainers: + - "@khersameesh24" diff --git a/subworkflows/local/proseg_preset_proseg2baysor/main.nf b/subworkflows/local/proseg_preset_proseg2baysor/main.nf new file mode 100644 index 00000000..3f9d8c99 --- /dev/null +++ b/subworkflows/local/proseg_preset_proseg2baysor/main.nf @@ -0,0 +1,50 @@ +// +// Runs proseg for the xenium format and proseg2baysor to generate cell ploygons +// + +include { PROSEG } from '../../../modules/local/proseg/preset/main' +include { PROSEG2BAYSOR } from '../../../modules/local/proseg/proseg2baysor/main' +include { XENIUMRANGER_IMPORT_SEGMENTATION } from '../../../modules/nf-core/xeniumranger/import-segmentation/main' + +workflow PROSEG_PRESET_PROSEG2BAYSOR { + take: + ch_bundle_path // channel: [ val(meta), ["path-to-xenium-bundle"] ] + ch_transcripts_file // channel: [ val(meta), [ "transcripts.parquet" ] ] + + main: + + ch_coordinate_space = channel.value("microns") + + // run proseg with the xenium format + PROSEG(ch_transcripts_file) + + + // run proseg-to-baysor on the zarr output from proseg v3 + PROSEG2BAYSOR(PROSEG.out.zarr) + + + // run xeniumranger import-segmentation + ch_imp_seg_inputs = ch_bundle_path + .combine(PROSEG2BAYSOR.out.xr_metadata, by: 0) + .combine(PROSEG2BAYSOR.out.xr_polygons, by: 0) + .map { meta, bundle, metadata, polygons2d -> + tuple( + meta, + bundle, + metadata, + polygons2d, + [], + [], + [], + ch_coordinate_space.val, + ) + } + + XENIUMRANGER_IMPORT_SEGMENTATION( + ch_imp_seg_inputs + ) + + emit: + coordinate_space = ch_coordinate_space // channel: [ "microns" ] + redefined_bundle = XENIUMRANGER_IMPORT_SEGMENTATION.out.outs // channel: [ val(meta), ["redefined-xenium-bundle"] ] +} diff --git a/subworkflows/local/proseg_preset_proseg2baysor/meta.yml b/subworkflows/local/proseg_preset_proseg2baysor/meta.yml new file mode 100644 index 00000000..6eb3f091 --- /dev/null +++ b/subworkflows/local/proseg_preset_proseg2baysor/meta.yml @@ -0,0 +1,48 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "proseg_preset_proseg2baysor" +description: to run proseg with the transcripts.parquet file as a coordinate-based segmentation run +keywords: + - proseg + - segmentation + - xeniumranger import-segmentation + - coordinate-based segmentation + - polygons + - metadata +components: + - proseg + - proseg2baysor + - xeniumranger/import/segmentation +input: + - ch_bundle_path: + description: | + path to the xenium bundle + Structure: [ val(meta), path("path-to-xenium-bundle") ] + - ch_transcripts_parquet: + description: | + input parquet file from the xenium bundle + Structure: [ val(meta), path("path-to-transcripts.parquet") ] +output: + - cell_polygons_2d: + description: | + the cell-polygons.geojson.gz file generated from proseg + Structure: [ val(meta), path("cell-polygons.geojson.gz") ] + - xr_polygons: + description: | + xeniumranger-compatible polygon file generated from the proseg command + Structure: [ val(meta), path("xr-cell-polygons.geojson") ] + - xr_metadata: + description: | + xeniumranger-compatible metadata file generated from the proseg command + Structure: [ val(meta), path("xr-transcript-metadata.csv") ] + - coordinate_space: + description: | + the coordinate space in which xeniumranger import-segmentation was run + Structure: [ val("microns") ] + - redefined_bundle: + description: | + the redefined xenium bundle generated with the segmentation results from baysor + Structure: [ val(meta), ["redefined-xenium-bundle"] ] +authors: + - "@khersameesh24" +maintainers: + - "@khersameesh24" diff --git a/subworkflows/local/proseg_preset_proseg2baysor_tiled/main.nf b/subworkflows/local/proseg_preset_proseg2baysor_tiled/main.nf new file mode 100644 index 00000000..7ff6b783 --- /dev/null +++ b/subworkflows/local/proseg_preset_proseg2baysor_tiled/main.nf @@ -0,0 +1,86 @@ +// +// Runs proseg with tiling: divide transcripts -> proseg per patch -> proseg2baysor -> stitch -> xeniumranger +// + +include { XENIUM_PATCH_DIVIDE } from '../../../modules/local/xenium_patch/divide/main' +include { PROSEG } from '../../../modules/local/proseg/preset/main' +include { PROSEG2BAYSOR } from '../../../modules/local/proseg/proseg2baysor/main' +include { XENIUM_PATCH_STITCH } from '../../../modules/local/xenium_patch/stitch/main' +include { XENIUMRANGER_IMPORT_SEGMENTATION } from '../../../modules/nf-core/xeniumranger/import-segmentation/main' + +workflow PROSEG_PRESET_PROSEG2BAYSOR_TILED { + + take: + ch_bundle_path // channel: [ val(meta), ["path-to-xenium-bundle"] ] + ch_transcripts_file // channel: [ val(meta), [ "transcripts.parquet" ] ] + + main: + + ch_coordinate_space = channel.value("microns") + + // Step 1: Divide transcripts into overlapping patches + XENIUM_PATCH_DIVIDE ( ch_transcripts_file ) + + // Step 2: Fan out patches for parallel processing + // transpose() emits one item per patch file: [meta, parquet_path] + ch_patches = XENIUM_PATCH_DIVIDE.out.patch_transcripts + .transpose() + .map { meta, parquet_file -> + def patch_id = parquet_file.parent.name + def patch_meta = meta.clone() + patch_meta.sample_id = meta.id + patch_meta.patch_id = patch_id + patch_meta.id = "${meta.id}_${patch_id}" + tuple(patch_meta, parquet_file) + } + + // Step 3: Run proseg on each patch independently + PROSEG ( ch_patches ) + + // Step 4: Convert proseg output to baysor format per patch + PROSEG2BAYSOR ( PROSEG.out.zarr ) + + // Step 5: Gather patch results per sample for stitching + ch_for_stitch = PROSEG2BAYSOR.out.xr_polygons + .join(PROSEG2BAYSOR.out.xr_metadata, by: 0) + .map { patch_meta, geojson, csv -> + tuple(patch_meta.sample_id, [patch_meta.patch_id, csv, geojson]) + } + .groupTuple(by: 0) + .map { sample_id, patch_data -> + def sorted = patch_data.sort { it -> it[0] } + def patch_ids = sorted.collect { it -> it[0] } + def csvs = sorted.collect { it -> it[1] } + def geojsons = sorted.collect { it -> it[2] } + tuple(sample_id, patch_ids, csvs, geojsons) + } + + // Combine with grid metadata from DIVIDE + ch_stitch_input = ch_for_stitch + .join( + XENIUM_PATCH_DIVIDE.out.grid + .map { meta, grid -> tuple(meta.id, grid) } + ) + .map { sample_id, patch_ids, csvs, geojsons, grid_json -> + def meta = [id: sample_id] + tuple(meta, grid_json, patch_ids, csvs, geojsons) + } + + // Step 6: Stitch patch results into unified segmentation output + XENIUM_PATCH_STITCH ( ch_stitch_input ) + + // Step 7: Run xeniumranger import-segmentation + // Note: Cell size filtering is handled inline by STITCH via --filter-method + ch_xr = ch_bundle_path + .combine(XENIUM_PATCH_STITCH.out.xr_polygons_transcript, by: 0) + .combine(ch_coordinate_space) + .map { meta, bundle, geojson, csv, coord_space -> + tuple(meta, bundle, csv, geojson, [], [], [], coord_space) + } + + XENIUMRANGER_IMPORT_SEGMENTATION ( ch_xr ) + + emit: + coordinate_space = ch_coordinate_space // channel: [ "microns" ] + redefined_bundle = XENIUMRANGER_IMPORT_SEGMENTATION.out.outs // channel: [ val(meta), ["redefined-xenium-bundle"] ] +} diff --git a/subworkflows/local/segger_create_train_predict/main.nf b/subworkflows/local/segger_create_train_predict/main.nf new file mode 100644 index 00000000..3f832d61 --- /dev/null +++ b/subworkflows/local/segger_create_train_predict/main.nf @@ -0,0 +1,77 @@ +// +// Run segger create_dataset, train and predict modules & parquet_to_csv +// + +include { SEGGER2XR } from '../../../modules/local/utility/segger2xr/main' +include { SEGGER_TRAIN } from '../../../modules/local/segger/train/main' +include { SEGGER_PREDICT } from '../../../modules/local/segger/predict/main' +include { SEGGER_CREATE_DATASET } from '../../../modules/local/segger/create_dataset/main' +include { XENIUMRANGER_IMPORT_SEGMENTATION } from '../../../modules/nf-core/xeniumranger/import-segmentation/main' + +workflow SEGGER_CREATE_TRAIN_PREDICT { + take: + ch_bundle // channel: [ val(meta), ["path-to-xenium-bundle"] ] + ch_transcripts_file // channel: [ val(meta), [bundle + "/transcripts.parquet"]] + segger_model // value: path to a pre-trained segger model checkpoint (or null) + + main: + + // Note: spatialaxe uses "pixels" but per 10x docs, transcript-based segmentation + // (like Baysor/Segger) must use "microns" since Xenium coordinates are in microns + ch_coordinate_space = channel.value("microns") + + // create dataset (always needed for predict step) + SEGGER_CREATE_DATASET(ch_bundle) + + // Determine model source and join all PREDICT inputs by meta. + // Without meta-based join, queue channels align by emission order, + // which is non-deterministic and causes cross-sample input mispairing. + if (segger_model) { + // Use pre-trained model - skip training + def model_path = file(segger_model) + ch_predict_paired = SEGGER_CREATE_DATASET.out.datasetdir + .join(ch_transcripts_file) + .map { meta, dataset, tx -> [meta, dataset, model_path, tx] } + } else { + // Train a new model per sample, join all inputs by meta + SEGGER_TRAIN(SEGGER_CREATE_DATASET.out.datasetdir) + ch_predict_paired = SEGGER_CREATE_DATASET.out.datasetdir + .join(SEGGER_TRAIN.out.trained_models) + .join(ch_transcripts_file) + } + // ch_predict_paired: [meta, dataset_dir, models_dir, transcripts] + + SEGGER_PREDICT( + ch_predict_paired.map { meta, dataset, _m, _tx -> [meta, dataset] }, + ch_predict_paired.map { _meta, _dataset, models, _tx -> models }, + ch_predict_paired.map { _meta, _dataset, _m, tx -> [tx] }, + ) + // convert parquet to XR compatible form + SEGGER2XR(SEGGER_PREDICT.out.transcripts) + + // run xeniumranger import-segmentation with Baysor-format CSV + viz polygons + // xeniumranger 4.0 expects Baysor CSV (with is_noise column) for --transcript-assignment + ch_imp_seg_inputs = ch_bundle + .combine(SEGGER2XR.out.segmentation_csv, by: 0) + .combine(SEGGER2XR.out.viz_polygons, by: 0) + .map { meta, bundle, segmentation_csv, polygons -> + tuple( + meta, + bundle, + segmentation_csv, // transcript_assignment (Baysor-format CSV) + polygons, // viz_polygons (GeoJSON cell boundaries) + [], // nuclei + [], // cells + [], // coordinate_transform + ch_coordinate_space.val, + ) + } + + XENIUMRANGER_IMPORT_SEGMENTATION( + ch_imp_seg_inputs + ) + + emit: + coordinate_space = ch_coordinate_space // channel: [ "microns" ] + redefined_bundle = XENIUMRANGER_IMPORT_SEGMENTATION.out.outs // channel: [ val(meta), ["redefined-xenium-bundle"] ] +} diff --git a/subworkflows/local/segger_create_train_predict/meta.yml b/subworkflows/local/segger_create_train_predict/meta.yml new file mode 100644 index 00000000..34d59770 --- /dev/null +++ b/subworkflows/local/segger_create_train_predict/meta.yml @@ -0,0 +1,55 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "segger_create_train_predict" +description: | + segger is a cutting-edge tool for cell segmentation in single-molecule spatial omics datasets, + subworkflow is the implementation of segger modules run in the recommended sequence +keywords: + - segger + - segmentation + - xeniumranger import-segmentation + - coordinate-based segmentation +components: + - segger + - segger/create/dataset + - segger/train + - segger/predict + - segger2xr + - xeniumranger/import/segmentation +input: + - ch_bundle_path: + description: | + Directory containing the raw dataset - xenium bundle (e.g., transcripts, boundaries). + Structure: [ val(meta), path("path-to-xenium-bundle") ] + - ch_transcripts_parquet: + description: | + input parquet file from the xenium bundle + Structure: [ val(meta), path("path-to-transcripts.parquet") ] +output: + - datasetdir: + description: | + Directory generated by the segger create dataset module + Structure: [ val(meta), path(datasetdir) ] + - trained_models: + description: | + The model trained on the data by the segger training module + Structure: [ val(meta), path(trained_models) ] + - benchmarks: + description: | + benchmarks generated from the segger training and prediction steps + Structure: [ val(meta), path(benchmarks) ] + - segger_transcripts: + description: | + transcripts parquet file generated after segger prediction conatining the segger ids + Structure: [ path(transcripts.parquet) ] + - coordinate_space: + description: | + the coordinate space in which xeniumranger import-segmentation was run + Structure: [ val("pixels") ] + - redefined_bundle: + description: | + the redefined xenium bundle generated with the segmentation results from baysor + Structure: [ val(meta), ["redefined-xenium-bundle"] ] +authors: + - "@khersameesh24" +maintainers: + - "@khersameesh24" diff --git a/subworkflows/local/spatialdata_write_meta_merge/main.nf b/subworkflows/local/spatialdata_write_meta_merge/main.nf new file mode 100644 index 00000000..24ce9e90 --- /dev/null +++ b/subworkflows/local/spatialdata_write_meta_merge/main.nf @@ -0,0 +1,81 @@ +// +// generate spatialdata object from the spatialaxe layers +// + +include { SPATIALDATA_META } from '../../../modules/local/spatialdata/meta/main' +include { SPATIALDATA_WRITE as SPATIALDATA_WRITE_RAW_BUNDLE } from '../../../modules/local/spatialdata/write/main' +include { SPATIALDATA_MERGE as SPATIALDATA_MERGE_RAW_REDEFINED } from '../../../modules/local/spatialdata/merge/main' +include { SPATIALDATA_WRITE as SPATIALDATA_WRITE_REDEFINED_BUNDLE } from '../../../modules/local/spatialdata/write/main' + +workflow SPATIALDATA_WRITE_META_MERGE { + take: + ch_bundle_path // channel: [ val(meta), [ "path-to-xenium-bundle" ] ] + ch_redefined_bundle // channel: [ val(meta), [ "redefined-xenium-bundle" ] ] + ch_coordinate_space // channel: [ "pixels" or "microns" ] + cell_segmentation_only // value: bool + mode // value: pipeline mode (image/coordinate/...) + nucleus_segmentation_only // value: bool + + main: + + ch_segmented_object = channel.empty() + + // check segmentation - only nuclei, cells or both cells & nuclei + if (mode == 'image') { + + if (nucleus_segmentation_only && cell_segmentation_only) { + ch_segmented_object = channel.value('cells_and_nuclei') + } + else if (nucleus_segmentation_only) { + ch_segmented_object = channel.value('nuclei') + } + else if (cell_segmentation_only) { + ch_segmented_object = channel.value('cells') + } + else { + ch_segmented_object = channel.value([]) + } + } + + // set all boundaries as false - default + if (mode == 'coordinate') { + ch_segmented_object = channel.value([]) + } + + // write spatialdata object from the raw xenium bundle + SPATIALDATA_WRITE_RAW_BUNDLE( + ch_bundle_path, + 'raw_bundle', + ch_segmented_object, + ch_coordinate_space, + ) + + + // write spatialdata object after running IMP_SEG + SPATIALDATA_WRITE_REDEFINED_BUNDLE( + ch_redefined_bundle, + 'redefined_bundle', + ch_segmented_object, + ch_coordinate_space, + ) + + + // merge raw & redefined spatialdata objects + SPATIALDATA_MERGE_RAW_REDEFINED( + SPATIALDATA_WRITE_RAW_BUNDLE.out.spatialdata.combine(ch_redefined_bundle, by: 0), + 'merged_bundle' + ) + + + // write metadata with spatialdata object + SPATIALDATA_META( + SPATIALDATA_MERGE_RAW_REDEFINED.out.merged_bundle.combine(ch_bundle_path, by: 0), + 'metadata' + ) + + emit: + sd_raw_bundle = SPATIALDATA_WRITE_RAW_BUNDLE.out.spatialdata // channel: [ val(meta), "spatialdata_raw" ] + sd_redefined_bundle = SPATIALDATA_WRITE_REDEFINED_BUNDLE.out.spatialdata // channel: [ val(meta), "spatialdata_redefined" ] + sd_merged_bundle = SPATIALDATA_MERGE_RAW_REDEFINED.out.merged_bundle // channel: [ val(meta), "spatialdata_merged" ] + sd_metadata = SPATIALDATA_META.out.metadata // channel: [ val(meta), "spatialdata_meta" ] +} diff --git a/subworkflows/local/spatialdata_write_meta_merge/meta.yml b/subworkflows/local/spatialdata_write_meta_merge/meta.yml new file mode 100644 index 00000000..0168d010 --- /dev/null +++ b/subworkflows/local/spatialdata_write_meta_merge/meta.yml @@ -0,0 +1,50 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "spatialdata_write_meta_merge" +description: | + SpatialData is a data framework that comprises a FAIR storage format and a collection of python libraries for + performant access, alignment, and processing of uni- and multi-modal spatial omics datasets +keywords: + - spatialdata + - xenium + - xeniumranger import-segmentation + - spatialdata object + - metadata + - merge spatialdata objects +components: + - spatialdata/write + - spatialdata/meta + - spatialdata/merge +input: + - ch_bundle_path: + description: | + Directory containing the raw dataset + Structure: [ val(meta), path("path-to-xenium-bundle") ] + - ch_redefined_bundle: + description: | + Directory containing the redefined xenium bundle after running xeniumranger import-segmentation + Structure: [ val(meta), path("redefined-xenium-bundle") ] + - ch_coordinate_space: + description: | + the coordinate space in which xeniumranger import-segmentation was run + Structure: [ val("pixels" or "microns") ] +output: + - sd_raw_bundle: + description: | + spatialdata object generated from the raw xenium bundle + Structure: [ val(meta), path(spatialdata_raw) ] + - sd_redefined_bundle: + description: | + spatialdata object generated from the redefined xenium bundle + Structure: [ val(meta), path(spatialdata_redefined) ] + - sd_merged_bundle: + description: | + spatialdata object generated from merging the spatialdata objects from raw and redefined xenium bundles + Structure: [ val(meta), path(spatialdata_merged) ] + - sd_metadata: + description: | + spatialdata object containing the metadata info. + Structure: [ path(transcripts.parquet) ] +authors: + - "@khersameesh24" +maintainers: + - "@khersameesh24" diff --git a/subworkflows/local/stardist_resolift_morphology_ome_tif/main.nf b/subworkflows/local/stardist_resolift_morphology_ome_tif/main.nf new file mode 100644 index 00000000..bc255409 --- /dev/null +++ b/subworkflows/local/stardist_resolift_morphology_ome_tif/main.nf @@ -0,0 +1,70 @@ +// +// Run stardist nuclei segmentation on the morphology tiff +// + +include { RESOLIFT } from '../../../modules/local/resolift/main' +include { EXTRACT_DAPI } from '../../../modules/local/utility/extract_dapi/main' +include { STARDIST as STARDIST_NUCLEI } from '../../../modules/nf-core/stardist/main' +include { CONVERT_MASK_UINT32 } from '../../../modules/local/utility/convert_mask_uint32/main' +include { XENIUMRANGER_IMPORT_SEGMENTATION } from '../../../modules/nf-core/xeniumranger/import-segmentation/main' + +workflow STARDIST_RESOLIFT_MORPHOLOGY_OME_TIF { + take: + ch_morphology_image // channel: [ val(meta), ["path-to-morphology.ome.tiff"] ] + ch_bundle_path // channel: [ val(meta), ["path-to-xenium-bundle"] ] + sharpen_tiff // value: bool + stardist_nuclei_model // value: stardist pretrained model name + + main: + + ch_imp_seg_inputs = channel.empty() + ch_coordinate_space = channel.value("pixels") + + // Use default model when no model is provided + stardist_model = stardist_nuclei_model ?: '2D_versatile_fluo' + + // sharpen morphology tiff if param - sharpen_tiff is true + if (sharpen_tiff) { + + RESOLIFT(ch_morphology_image) + + ch_image = RESOLIFT.out.enhanced_tiff + } + else { + + ch_image = ch_morphology_image + } + + // Extract DAPI channel for StarDist (expects single-channel input) + EXTRACT_DAPI(ch_image) + + // Run StarDist nuclei segmentation on DAPI channel + STARDIST_NUCLEI(EXTRACT_DAPI.out.dapi, [stardist_model, []]) + + // Convert mask to uint32 for XeniumRanger compatibility + CONVERT_MASK_UINT32(STARDIST_NUCLEI.out.mask) + + // Run import-segmentation with nuclei only + // XeniumRanger expands nuclei by expansion_distance to create cell boundaries + ch_imp_seg_inputs = ch_bundle_path + .combine(CONVERT_MASK_UINT32.out.mask, by: 0) + .map { meta, bundle, nuclei_seg -> + tuple( + meta, + bundle, + [], + [], + nuclei_seg, + [], + [], + ch_coordinate_space.val, + ) + } + XENIUMRANGER_IMPORT_SEGMENTATION( + ch_imp_seg_inputs + ) + + emit: + coordinate_space = ch_coordinate_space // channel: [ ["pixels"] ] + redefined_bundle = XENIUMRANGER_IMPORT_SEGMENTATION.out.outs // channel: [ val(meta), ["redefined-xenium-bundle"] ] +} diff --git a/subworkflows/local/utils_nfcore_spatialaxe_pipeline/main.nf b/subworkflows/local/utils_nfcore_spatialaxe_pipeline/main.nf new file mode 100644 index 00000000..801174e2 --- /dev/null +++ b/subworkflows/local/utils_nfcore_spatialaxe_pipeline/main.nf @@ -0,0 +1,351 @@ +// +// Subworkflow with functionality specific to the nf-core/spatialaxe pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { samplesheetToList } from 'plugin/nf-schema' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW TO INITIALISE PIPELINE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow PIPELINE_INITIALISATION { + take: + version // boolean: Display version and exit + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + monochrome_logs // boolean: Do not use coloured log outputs + nextflow_cli_args // array: List of positional nextflow CLI args + outdir // string: The output directory where the results will be saved + input // string: Path to input samplesheet + help // boolean: Display help message and exit + help_full // boolean: Show the full help message + show_hidden // boolean: Show hidden parameters in the help message + format // string: input data platform (xenium | cosmx | merscope) + gene_panel // string: path to gene panel + gene_synonyms // string: path to gene synonyms + image_seg_methods // list: valid image-mode segmentation methods + method // string: chosen segmentation method + mode // string: pipeline mode + nucleus_segmentation_only // boolean + offtarget_probe_tracking // boolean + probes_fasta // string: path to probes fasta + reference_annotations // string: path to reference annotations + relabel_genes // boolean + segmentation_mask // string: path to segmentation mask + transcript_seg_methods // list: valid coordinate-mode segmentation methods + + main: + + // + // Print version and exit if required and dump pipeline parameters to JSON file + // + UTILS_NEXTFLOW_PIPELINE( + version, + true, + outdir, + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1, + ) + + // + // Validate parameters and generate parameter summary to stdout + // + before_text = """ +-\033[2m----------------------------------------------------\033[0m- + \033[0;32m,--.\033[0;30m/\033[0;32m,-.\033[0m +\033[0;34m ___ __ __ __ ___ \033[0;32m/,-._.--~\'\033[0m +\033[0;34m |\\ | |__ __ / ` / \\ |__) |__ \033[0;33m} {\033[0m +\033[0;34m | \\| | \\__, \\__/ | \\ |___ \033[0;32m\\`-._,-`-,\033[0m + \033[0;32m`._,._,\'\033[0m +\033[0;35m nf-core/spatialaxe ${workflow.manifest.version}\033[0m +-\033[2m----------------------------------------------------\033[0m- +""" + after_text = """${workflow.manifest.doi ? "\n* The pipeline\n" : ""}${workflow.manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/', '')}" }.join("\n")}${workflow.manifest.doi ? "\n" : ""} +* The nf-core framework + https://doi.org/10.1038/s41587-020-0439-x + +* Software dependencies + https://github.com/nf-core/spatialaxe/blob/master/CITATIONS.md +""" + command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --mode --outdir " + + UTILS_NFSCHEMA_PLUGIN( + workflow, + validate_params, + null, + help, + help_full, + show_hidden, + before_text, + after_text, + command, + ) + + // + // Check config provided to the pipeline + // + UTILS_NFCORE_PIPELINE( + nextflow_cli_args + ) + + // + // Custom validation for pipeline parameters + // + validateInputParameters( + input, + mode, + method, + format, + image_seg_methods, + transcript_seg_methods, + relabel_genes, + gene_panel, + nucleus_segmentation_only, + segmentation_mask, + offtarget_probe_tracking, + probes_fasta, + reference_annotations, + gene_synonyms, + ) + log.info("✅ Pipeline parameters validated.") + + // + // Create channel from input file provided through --input + // + try { + + channel.fromList(samplesheetToList(input, "${projectDir}/assets/schema_input.json")) + .map { meta, bundle, image -> + return [[id: meta.id], bundle, image] + } + .set { ch_samplesheet } + + log.info("✅ Samplesheet validated.") + } + catch (Exception e) { + + error("❌ Samplesheet validation failed: ${e.message}") + } + + + // Xenium bundle file-presence validation now runs in the main workflow + // (workflows/spatialaxe.nf) AFTER UNTAR staging, so it works uniformly for + // both directory inputs and tarball inputs. + + emit: + samplesheet = ch_samplesheet +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW FOR PIPELINE COMPLETION +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow PIPELINE_COMPLETION { + take: + email // string: email address + email_on_fail // string: email address sent on pipeline failure + plaintext_email // boolean: Send plain-text email instead of HTML + outdir // path: Path to output directory where results will be published + monochrome_logs // boolean: Disable ANSI colour codes in log output + hook_url // string: hook URL for notifications + multiqc_report // string: Path to MultiQC report + + main: + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + def multiqc_reports = multiqc_report.toList() + + // + // Completion email and summary + // + workflow.onComplete { + if (email || email_on_fail) { + completionEmail( + summary_params, + email, + email_on_fail, + plaintext_email, + outdir, + monochrome_logs, + multiqc_reports.getVal(), + ) + } + + completionSummary(monochrome_logs) + if (hook_url) { + imNotification(summary_params, hook_url) + } + } + + workflow.onError { + error("❌ Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting") + } +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ +// +// Check and validate pipeline parameters +// +def validateInputParameters( + input, + mode, + method, + format, + image_seg_methods, + transcript_seg_methods, + relabel_genes, + gene_panel, + nucleus_segmentation_only, + segmentation_mask, + offtarget_probe_tracking, + probes_fasta, + reference_annotations, + gene_synonyms +) { + + // check if conda profile is provided + if (workflow.profile.contains('conda')) { + error("❌ Error: `nf-core/spatialaxe` does not support running the pipeline with profile: conda ") + } + + // check if the samplesheet provided with the test config is assets/samplesheet.csv + if (workflow.profile.contains('test') && !"${input}".endsWith("assets/samplesheet.csv")) { + error("❌ Error: Use the samplesheet at: ${projectDir}/assets/samplesheet.csv with `--input` when running the pipeline in test profile.") + } + + // check if the segmentation method provided is valid for a mode + if (mode == 'image' && method) { + if (!image_seg_methods.contains(method)) { + error("❌ Error: Invalid segmentation method: ${method} provided for the `image` based mode. Options: ${image_seg_methods}") + } + } + + if (mode == 'coordinate' && method) { + if (!transcript_seg_methods.contains(method)) { + error("❌ Error: Invalid segmentation method: `${method}` provided for the `coordinate` based mode. Options: ${transcript_seg_methods}") + } + } + + // check method-format compatibility (schema enum constrains the universe; this enforces the method-specific subset) + def valid_segger_formats = ['xenium'] + def valid_proseg_formats = ['xenium', 'cosmx', 'merscope'] + if (method == 'segger' && !(format in valid_segger_formats)) { + error("❌ Error: Invalid --format '${format}' for segger. Valid: ${valid_segger_formats}") + } + if (method == 'proseg' && !(format in valid_proseg_formats)) { + error("❌ Error: Invalid --format '${format}' for proseg. Valid: ${valid_proseg_formats}") + } + + // check if --relabel_genes is true but --gene_panel is not provided + if (relabel_genes && !gene_panel) { + log.warn("⚠️ Relabel genes is enabled, but gene panel is not provided with the `--gene_panel`. Using `gene_panel.json` in the xenium bundle.") + } + + // check if --relabel_genes is true but --gene_panel is not provided + if (gene_panel && !relabel_genes) { + log.warn("⚠️ Gene panel provided, but relabel genes is disabled. Using `gene_panel.json` only to generate metadata.") + } + + // check if segmentation method is xeniumranger and nucleus_ony_segmentation is enabled + if (method == 'xeniumranger' && !nucleus_segmentation_only) { + log.warn("⚠️ Nucleus segmentation is disabled. Running xeniumranger resegment module to redefine xenium bundle without nucleus segmentation.") + log.warn("⚠️ Use --nucleus_segmentation_only to enable nucleus segmentation to redefine xenium bundle with import-segmentation module.") + } + + // check if segmentation mask is provided in image mode and baysor method + if (mode == 'image' && method == 'baysor') { + if (!segmentation_mask) { + log.warn("⚠️ Missing segmentation mask with `--segmentation_mask` when pipeline is run in ${mode} and with the ${method}. Running in coordinate mode.") + } + } + + // check if required arguments are provided for off-target probe tracking + if (!mode && offtarget_probe_tracking) { + if(!probes_fasta || !reference_annotations || !gene_synonyms) { + error("❌ Error: Missing required param(s) for off-target-proebe detection.") + } + error("❌ Error: Use --mode qc and --offtraget_probe_tracking to run off-target probe tracking.") + } +} + +// +// Generate methods description for MultiQC +// +def toolCitationText() { + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "MultiQC (Ewels et al. 2016)", + ".", + ].join(' ').trim() + + return citation_text +} + +def toolBibliographyText() { + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text +} + +def methodsDescriptionText(mqc_methods_yaml) { + // Convert to a named map so can be used as with familiar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = workflow.toMap() + meta["manifest_map"] = workflow.manifest.toMap() + + // Pipeline DOI + if (meta.manifest_map.doi) { + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + def temp_doi_ref = "" + def manifest_doi = meta.manifest_map.doi.tokenize(",") + manifest_doi.each { doi_ref -> + temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), " + } + meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2) + } + else { + meta["doi_text"] = "" + } + meta["nodoi_text"] = meta.manifest_map.doi ? "" : "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + // meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + // meta["tool_bibliography"] = toolBibliographyText() + + def methods_text = mqc_methods_yaml.text + + def engine = new groovy.text.SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html.toString() +} diff --git a/subworkflows/local/utils_nfcore_spatialxe_pipeline/main.nf b/subworkflows/local/utils_nfcore_spatialxe_pipeline/main.nf deleted file mode 100644 index c60249c8..00000000 --- a/subworkflows/local/utils_nfcore_spatialxe_pipeline/main.nf +++ /dev/null @@ -1,295 +0,0 @@ -// -// Subworkflow with functionality specific to the nf-core/spatialxe pipeline -// - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' -include { paramsSummaryMap } from 'plugin/nf-schema' -include { samplesheetToList } from 'plugin/nf-schema' -include { paramsHelp } from 'plugin/nf-schema' -include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' -include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' -include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' -include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUBWORKFLOW TO INITIALISE PIPELINE -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow PIPELINE_INITIALISATION { - - take: - version // boolean: Display version and exit - validate_params // boolean: Boolean whether to validate parameters against the schema at runtime - monochrome_logs // boolean: Do not use coloured log outputs - nextflow_cli_args // array: List of positional nextflow CLI args - outdir // string: The output directory where the results will be saved - input // string: Path to input samplesheet - help // boolean: Display help message and exit - help_full // boolean: Show the full help message - show_hidden // boolean: Show hidden parameters in the help message - - main: - - ch_versions = channel.empty() - - // - // Print version and exit if required and dump pipeline parameters to JSON file - // - UTILS_NEXTFLOW_PIPELINE ( - version, - true, - outdir, - workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 - ) - - // - // Validate parameters and generate parameter summary to stdout - // - - def before_text = "" - def after_text = "" - before_text = """ --\033[2m----------------------------------------------------\033[0m- - \033[0;32m,--.\033[0;30m/\033[0;32m,-.\033[0m -\033[0;34m ___ __ __ __ ___ \033[0;32m/,-._.--~\'\033[0m -\033[0;34m |\\ | |__ __ / ` / \\ |__) |__ \033[0;33m} {\033[0m -\033[0;34m | \\| | \\__, \\__/ | \\ |___ \033[0;32m\\`-._,-`-,\033[0m - \033[0;32m`._,._,\'\033[0m -\033[0;35m nf-core/spatialxe ${workflow.manifest.version}\033[0m --\033[2m----------------------------------------------------\033[0m- -""" - after_text = """${workflow.manifest.doi ? "\n* The pipeline\n" : ""}${workflow.manifest.doi.tokenize(",").collect { doi -> " https://doi.org/${doi.trim().replace('https://doi.org/','')}"}.join("\n")}${workflow.manifest.doi ? "\n" : ""} -* The nf-core framework - https://doi.org/10.1038/s41587-020-0439-x - -* Software dependencies - https://github.com/nf-core/spatialxe/blob/master/CITATIONS.md -""" - if (monochrome_logs) { - before_text = before_text.replaceAll(/\033\[[0-9;]*m/, '') - } - - command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " - - UTILS_NFSCHEMA_PLUGIN ( - workflow, - validate_params, - null, - help, - help_full, - show_hidden, - before_text, - after_text, - command - ) - - // - // Check config provided to the pipeline - // - UTILS_NFCORE_PIPELINE ( - nextflow_cli_args - ) - - // - // Custom validation for pipeline parameters - // - validateInputParameters() - - // - // Create channel from input file provided through params.input - // - - channel - .fromList(samplesheetToList(input, "${projectDir}/assets/schema_input.json")) - .map { - meta, fastq_1, fastq_2 -> - if (!fastq_2) { - return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] - } else { - return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] - } - } - .groupTuple() - .map { samplesheet -> - validateInputSamplesheet(samplesheet) - } - .map { - meta, fastqs -> - return [ meta, fastqs.flatten() ] - } - .set { ch_samplesheet } - - emit: - samplesheet = ch_samplesheet - versions = ch_versions -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUBWORKFLOW FOR PIPELINE COMPLETION -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow PIPELINE_COMPLETION { - - take: - email // string: email address - email_on_fail // string: email address sent on pipeline failure - plaintext_email // boolean: Send plain-text email instead of HTML - outdir // path: Path to output directory where results will be published - monochrome_logs // boolean: Disable ANSI colour codes in log output - multiqc_report // string: Path to MultiQC report - - main: - summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") - def multiqc_reports = multiqc_report.toList() - - // - // Completion email and summary - // - workflow.onComplete { - if (email || email_on_fail) { - completionEmail( - summary_params, - email, - email_on_fail, - plaintext_email, - outdir, - monochrome_logs, - multiqc_reports.getVal(), - ) - } - - completionSummary(monochrome_logs) - - } - - workflow.onError { - log.error "Pipeline failed. Please refer to troubleshooting docs for common issues: https://nf-co.re/docs/running/troubleshooting" - } -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - FUNCTIONS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ -// -// Check and validate pipeline parameters -// -def validateInputParameters() { - genomeExistsError() -} - -// -// Validate channels from input samplesheet -// -def validateInputSamplesheet(input) { - def (metas, fastqs) = input[1..2] - - // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end - def endedness_ok = metas.collect{ meta -> meta.single_end }.unique().size == 1 - if (!endedness_ok) { - error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") - } - - return [ metas[0], fastqs ] -} -// -// Get attribute from genome config file e.g. fasta -// -def getGenomeAttribute(attribute) { - if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - return params.genomes[ params.genome ][ attribute ] - } - } - return null -} - -// -// Exit pipeline if incorrect --genome key provided -// -def genomeExistsError() { - if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + - " Currently, the available genome keys are:\n" + - " ${params.genomes.keySet().join(", ")}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - error(error_string) - } -} -// -// Generate methods description for MultiQC -// -def toolCitationText() { - // TODO nf-core: Optionally add in-text citation tools to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report - def citation_text = [ - "Tools used in the workflow included:", - "FastQC (Andrews 2010),", - "MultiQC (Ewels et al. 2016)", - "." - ].join(' ').trim() - - return citation_text -} - -def toolBibliographyText() { - // TODO nf-core: Optionally add bibliographic entries to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report - def reference_text = [ - "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", - "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " - ].join(' ').trim() - - return reference_text -} - -def methodsDescriptionText(mqc_methods_yaml) { - // Convert to a named map so can be used as with familiar NXF ${workflow} variable syntax in the MultiQC YML file - def meta = [:] - meta.workflow = workflow.toMap() - meta["manifest_map"] = workflow.manifest.toMap() - - // Pipeline DOI - if (meta.manifest_map.doi) { - // Using a loop to handle multiple DOIs - // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers - // Removing ` ` since the manifest.doi is a string and not a proper list - def temp_doi_ref = "" - def manifest_doi = meta.manifest_map.doi.tokenize(",") - manifest_doi.each { doi_ref -> - temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), " - } - meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2) - } else meta["doi_text"] = "" - meta["nodoi_text"] = meta.manifest_map.doi ? "" : "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " - - // Tool references - meta["tool_citations"] = "" - meta["tool_bibliography"] = "" - - // TODO nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! - // meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") - // meta["tool_bibliography"] = toolBibliographyText() - - - def methods_text = mqc_methods_yaml.text - - def engine = new groovy.text.SimpleTemplateEngine() - def description_html = engine.createTemplate(methods_text).make(meta) - - return description_html.toString() -} diff --git a/subworkflows/local/xeniumranger_import_segmentation_redefine_bundle/main.nf b/subworkflows/local/xeniumranger_import_segmentation_redefine_bundle/main.nf new file mode 100644 index 00000000..4c7b41d5 --- /dev/null +++ b/subworkflows/local/xeniumranger_import_segmentation_redefine_bundle/main.nf @@ -0,0 +1,133 @@ +// +// Run xeniumranger import-segmentation +// + +include { XENIUMRANGER_IMPORT_SEGMENTATION as IMP_SEG_COUNT_MATRIX_EXP_DISTANCE } from '../../../modules/nf-core/xeniumranger/import-segmentation/main' +include { XENIUMRANGER_IMPORT_SEGMENTATION as IMP_SEG_POLYGON_GEOJSON_INPUT } from '../../../modules/nf-core/xeniumranger/import-segmentation/main' +include { XENIUMRANGER_IMPORT_SEGMENTATION as IMP_SEG_TRANS_MATRIX_INPUT } from '../../../modules/nf-core/xeniumranger/import-segmentation/main' + + +workflow XENIUMRANGER_IMPORT_SEGMENTATION_REDEFINE_BUNDLE { + take: + ch_bundle_path // channel: [ val(meta), [ "path-to-xenium-bundle" ] ] + alignment_csv // value: path to alignment csv (or null) + expansion_distance // value: nuclear expansion distance + nucleus_segmentation_only // value: bool + qupath_polygons // value: path to qupath polygons dir (or null) + + main: + + ch_versions = channel.empty() + ch_redefined_bundle = channel.empty() + ch_coordinate_space = channel.empty() + + cells = ch_bundle_path.map { meta, bundle -> + return [meta, bundle + "/cells.zarr.zip"] + } + + // scenario - 1 change nuclear expansion distance / create a nucleus-only count matrix(--expansion_distance=0) + if (expansion_distance == 0 || expansion_distance != 5) { + ch_coordinate_space = "microns" + ch_imp_seg_inputs = ch_bundle_path + .combine(cells, by: 0) + .map { meta, bundle, cells_zarr -> + tuple( + meta, + bundle, + [], + [], + cells_zarr, + [], + [], + ch_coordinate_space.val, + ) + } + + IMP_SEG_COUNT_MATRIX_EXP_DISTANCE( + ch_imp_seg_inputs + ) + ch_redefined_bundle = IMP_SEG_COUNT_MATRIX_EXP_DISTANCE.out.outs + } + + // scenario - 2 polygon input - geojson format (from QuPath) + if (qupath_polygons && nucleus_segmentation_only) { + + ch_coordinate_space = "microns" + ch_imp_seg_inputs = ch_bundle_path + .combine(qupath_polygons) + .map { meta, bundle, polygons_geojson -> + tuple( + meta, + bundle, + [], + [], + polygons_geojson, + [], + [], + ch_coordinate_space.val, + ) + } + + IMP_SEG_POLYGON_GEOJSON_INPUT( + ch_imp_seg_inputs + ) + ch_redefined_bundle = IMP_SEG_POLYGON_GEOJSON_INPUT.out.outs + } + else if (qupath_polygons) { + + ch_coordinate_space = "microns" + ch_imp_seg_inputs = ch_bundle_path + .combine(qupath_polygons) + .map { meta, bundle, polygons_geojson -> + tuple( + meta, + bundle, + [], + [], + polygons_geojson, + polygons_geojson, + [], + ch_coordinate_space.val, + ) + } + + IMP_SEG_POLYGON_GEOJSON_INPUT( + ch_imp_seg_inputs + ) + ch_redefined_bundle = IMP_SEG_POLYGON_GEOJSON_INPUT.out.outs + } + + // scenario 3 - mask input - included in the cellpose subworkflow + + // scenario 4 - transcript assignment input - included in the baysor & proseg subworkflows + + // scenario 5 - transformation matrix input + if (qupath_polygons && alignment_csv) { + + ch_imp_seg_inputs = ch_bundle_path + .combine(qupath_polygons) + .combine(alignment_csv) + .map { meta, bundle, polygons_geojson, alignment_csv_file -> + tuple( + meta, + bundle, + [], + [], + polygons_geojson, + polygons_geojson, + alignment_csv_file, + ch_coordinate_space.val, + ) + } + + IMP_SEG_TRANS_MATRIX_INPUT( + ch_imp_seg_inputs + ) + ch_redefined_bundle = IMP_SEG_TRANS_MATRIX_INPUT.out.outs + } + + emit: + redefined_bundle = ch_redefined_bundle // channel: [ val(meta), ["redefined-xenium-bundle"] ] + coordinate_space = ch_coordinate_space // channel: [ ["pixels"] ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/xeniumranger_import_segmentation_redefine_bundle/meta.yml b/subworkflows/local/xeniumranger_import_segmentation_redefine_bundle/meta.yml new file mode 100644 index 00000000..fb040abc --- /dev/null +++ b/subworkflows/local/xeniumranger_import_segmentation_redefine_bundle/meta.yml @@ -0,0 +1,32 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "xeniumranger_import_segmentation_redefine_bundle" +description: | + The import-segmentation pipeline allows you to specify 2D nuclei and/or cell segmentation results to use for + assigning transcripts to cells and recalculate all Xenium Onboard Analysis (XOA) outputs that depend on segmentation. +keywords: + - xenium + - xeniumranger import-segmentation + - qupath + - expansion distance + - segmentation + - polygons +components: + - xeniumranger/import/segmentation +input: + - ch_bundle_path: + description: | + Directory containing the raw dataset + Structure: [ val(meta), path("path-to-xenium-bundle") ] +output: + - redefined_bundle: + description: | + the redefined xenium bundle generated with the segmentation results from baysor + Structure: [ val(meta), ["redefined-xenium-bundle"] ] + - coordinate_space: + description: | + the coordinate space in which xeniumranger import-segmentation was run + Structure: [ val("microns") ] +authors: + - "@khersameesh24" +maintainers: + - "@khersameesh24" diff --git a/subworkflows/local/xeniumranger_relabel_resegment/main.nf b/subworkflows/local/xeniumranger_relabel_resegment/main.nf new file mode 100644 index 00000000..424224a7 --- /dev/null +++ b/subworkflows/local/xeniumranger_relabel_resegment/main.nf @@ -0,0 +1,29 @@ +// +// run xeniumranger relabel & resegment to redine the xenium bundle +// + +include { XENIUMRANGER_RELABEL } from '../../../modules/nf-core/xeniumranger/relabel/main' +include { XENIUMRANGER_RESEGMENT } from '../../../modules/nf-core/xeniumranger/resegment/main' + +workflow XENIUMRANGER_RELABEL_RESEGMENT { + take: + ch_bundle_path // channel: [ val(meta), [ "path-to-xenium-bundle" ] ] + ch_gene_panel // channel: [ val(meta), ["path-to-gene_panel.json"] ] + + main: + + ch_versions = channel.empty() + + // Combine bundle path with gene panel into a single tuple for relabel + XENIUMRANGER_RELABEL( + ch_bundle_path.combine(ch_gene_panel, by: 0), + ) + + XENIUMRANGER_RESEGMENT( + XENIUMRANGER_RELABEL.out.outs + ) + + emit: + redefined_bundle = XENIUMRANGER_RESEGMENT.out.outs // channel: [ val(meta), ["redefined-xenium-bundle"] ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/xeniumranger_relabel_resegment/meta.yml b/subworkflows/local/xeniumranger_relabel_resegment/meta.yml new file mode 100644 index 00000000..5195335e --- /dev/null +++ b/subworkflows/local/xeniumranger_relabel_resegment/meta.yml @@ -0,0 +1,33 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "xeniumranger_relabel_resegment" +description: | + The relabel pipeline allows you to change the gene labels applied to decoded transcripts. + The resegment pipeline allows you to generate a new segmentation of the morphology image space by rerunning the + Xenium Onboard Analysis (XOA) segmentation algorithms with modified parameters. +keywords: + - xenium + - xeniumranger resegment + - expansion distance + - gene panel + - relabel +components: + - xeniumranger/relabel + - xeniumranger/resegment +input: + - ch_bundle_path: + description: | + Directory containing the raw dataset + Structure: [ val(meta), path("path-to-xenium-bundle") ] + - ch_gene_panel: + description: | + The gene panel JSON file to use for relabeling decoded transcripts + Structure: [ path("path-to-gene_panel.json") ] +output: + - redefined_bundle: + description: | + the redefined xenium bundle generated with the segmentation results from baysor + Structure: [ val(meta), ["redefined-xenium-bundle"] ] +authors: + - "@khersameesh24" +maintainers: + - "@khersameesh24" diff --git a/subworkflows/local/xeniumranger_resegment_morphology_ome_tif/main.nf b/subworkflows/local/xeniumranger_resegment_morphology_ome_tif/main.nf new file mode 100644 index 00000000..bcd97584 --- /dev/null +++ b/subworkflows/local/xeniumranger_resegment_morphology_ome_tif/main.nf @@ -0,0 +1,61 @@ +// +// Run xeniumranger resegment +// + +include { XENIUMRANGER_RESEGMENT } from '../../../modules/nf-core/xeniumranger/resegment/main' +include { XENIUMRANGER_IMPORT_SEGMENTATION } from '../../../modules/nf-core/xeniumranger/import-segmentation/main' + +workflow XENIUMRANGER_RESEGMENT_MORPHOLOGY_OME_TIF { + take: + ch_bundle_path // channel: [ val(meta), ["path-to-xenium-bundle"] ] + nucleus_segmentation_only // value: bool + + main: + + ch_redefined_bundle = channel.empty() + ch_coordinate_space = channel.value("pixels") + + // run resegment with changed config values + XENIUMRANGER_RESEGMENT(ch_bundle_path) + + + // run import segmentation to redine xenium bundle along with nuclear segmentation + // Keep meta in the cells channel for proper per-sample joining + def cells = XENIUMRANGER_RESEGMENT.out.outs.map { meta, bundle -> + return [meta, bundle + "/cells.zarr.zip"] + } + + // adjust the nuclear expansion distance without altering nuclei detection + if (nucleus_segmentation_only) { + + def ch_imp_seg_inputs = ch_bundle_path + .join(XENIUMRANGER_RESEGMENT.out.outs, by: 0) + .join(cells, by: 0) + .map { meta, bundle, _reseg_bundle, cells_zarr -> + tuple( + meta, + bundle, + [], + [], + [], + cells_zarr, + [], + "pixels", + ) + } + + XENIUMRANGER_IMPORT_SEGMENTATION( + ch_imp_seg_inputs + ) + + ch_redefined_bundle = XENIUMRANGER_IMPORT_SEGMENTATION.out.outs + } + else { + + ch_redefined_bundle = XENIUMRANGER_RESEGMENT.out.outs + } + + emit: + redefined_bundle = ch_redefined_bundle // channel: [ val(meta), ["redefined-xenium-bundle"] ] + coordinate_space = ch_coordinate_space // channel: [ ["pixels"] ] +} diff --git a/subworkflows/local/xeniumranger_resegment_morphology_ome_tif/meta.yml b/subworkflows/local/xeniumranger_resegment_morphology_ome_tif/meta.yml new file mode 100644 index 00000000..3af4f3aa --- /dev/null +++ b/subworkflows/local/xeniumranger_resegment_morphology_ome_tif/meta.yml @@ -0,0 +1,33 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "xeniumranger_resegment_morphology_ome_tif" +description: | + The resegment pipeline allows you to generate a new segmentation of the morphology image space by rerunning the + Xenium Onboard Analysis (XOA) segmentation algorithms with modified parameters. +keywords: + - xenium + - xeniumranger resegment + - xeniumranger import-segmentation + - expansion distance + - cell segmentation + - nucleus segmentation +components: + - xeniumranger/import/segmentation + - xeniumranger/resegment +input: + - ch_bundle_path: + description: | + Directory containing the raw dataset + Structure: [ val(meta), path("path-to-xenium-bundle") ] +output: + - redefined_bundle: + description: | + the redefined xenium bundle generated with the segmentation results from baysor + Structure: [ val(meta), ["redefined-xenium-bundle"] ] + - coordinate_space: + description: | + the coordinate space in which xeniumranger import-segmentation was run + Structure: [ val("pixels") ] +authors: + - "@khersameesh24" +maintainers: + - "@khersameesh24" diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test index 68718e4f..897d6681 100644 --- a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test @@ -40,9 +40,9 @@ nextflow_function { } } - test("Test Function checkCondaChannels") { + test("Test Function checkCondachannels") { - function "checkCondaChannels" + function "checkCondachannels" then { assertAll( diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap index e3f0baf4..f03a352a 100644 --- a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap @@ -9,7 +9,7 @@ }, "timestamp": "2024-02-28T12:02:05.308243" }, - "Test Function checkCondaChannels": { + "Test Function checkCondachannels": { "content": null, "meta": { "nf-test": "0.8.4", diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index afca5439..bfd25876 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -17,7 +17,7 @@ workflow UTILS_NFCORE_PIPELINE { checkProfileProvided(nextflow_cli_args) emit: - valid_config = valid_config + valid_config } /* @@ -98,7 +98,7 @@ def workflowVersionToYAML() { // Get channel of software versions used in pipeline in YAML format // def softwareVersionsToYAML(ch_versions) { - return ch_versions.unique().map { version -> processVersionsFromYAML(version) }.unique().mix(channel.of(workflowVersionToYAML())) + return ch_versions.unique().map { version -> processVersionsFromYAML(version) }.unique().mix(Channel.of(workflowVersionToYAML())) } // @@ -353,3 +353,67 @@ def completionSummary(monochrome_logs=true) { log.info("-${colors.purple}[${workflow.manifest.name}]${colors.red} Pipeline completed with errors${colors.reset}-") } } + +// +// Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack +// +def imNotification(summary_params, hook_url) { + def summary = [:] + summary_params + .keySet() + .sort() + .each { group -> + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) { + misc_fields['repository'] = workflow.repository + } + if (workflow.commitId) { + misc_fields['commitid'] = workflow.commitId + } + if (workflow.revision) { + misc_fields['revision'] = workflow.revision + } + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = getWorkflowVersion() + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("${workflow.projectDir}/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection() + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")) + def postRC = post.getResponseCode() + if (!postRC.equals(200)) { + log.warn(post.getErrorStream().getText()) + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.nf.test deleted file mode 100644 index 8940d32d..00000000 --- a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.nf.test +++ /dev/null @@ -1,29 +0,0 @@ -nextflow_workflow { - - name "Test Workflow UTILS_NFCORE_PIPELINE" - script "../main.nf" - config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" - workflow "UTILS_NFCORE_PIPELINE" - tag "subworkflows" - tag "subworkflows_nfcore" - tag "utils_nfcore_pipeline" - tag "subworkflows/utils_nfcore_pipeline" - - test("Should run without failures") { - - when { - workflow { - """ - input[0] = [] - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert snapshot(workflow.out).match() } - ) - } - } -} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.nf.test.snap deleted file mode 100644 index 859d1030..00000000 --- a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.nf.test.snap +++ /dev/null @@ -1,19 +0,0 @@ -{ - "Should run without failures": { - "content": [ - { - "0": [ - true - ], - "valid_config": [ - true - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T12:03:25.726491" - } -} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfschema_plugin/main.nf b/subworkflows/nf-core/utils_nfschema_plugin/main.nf index 1df8b76f..ee4738c8 100644 --- a/subworkflows/nf-core/utils_nfschema_plugin/main.nf +++ b/subworkflows/nf-core/utils_nfschema_plugin/main.nf @@ -38,7 +38,7 @@ workflow UTILS_NFSCHEMA_PLUGIN { } log.info paramsHelp( help_options, - (params.help instanceof String && params.help != "true") ? params.help : "", + params.help instanceof String ? params.help : "", ) exit 0 } @@ -71,3 +71,4 @@ workflow UTILS_NFSCHEMA_PLUGIN { emit: dummy_emit = true } + diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config index f6537cc3..8d8c7371 100644 --- a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config @@ -1,5 +1,5 @@ plugins { - id "nf-schema@2.6.1" + id "nf-schema@2.5.1" } validation { diff --git a/tests/.nftignore b/tests/.nftignore index e128a128..2e57a91e 100644 --- a/tests/.nftignore +++ b/tests/.nftignore @@ -8,5 +8,5 @@ multiqc/multiqc_data/multiqc_software_versions.txt multiqc/multiqc_data/llms-full.txt multiqc/multiqc_plots/{svg,pdf,png}/*.{svg,pdf,png} multiqc/multiqc_report.html -fastqc/*_fastqc.{html,zip} pipeline_info/*.{html,json,txt,yml} +**/proseg/preset/** diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000..a8eb3c10 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,9 @@ +"""Pytest configuration for nf-xenium-processing tests.""" + +from __future__ import annotations + +import sys +from pathlib import Path + +# Add bin/ to path so skill_*.py modules can be imported +sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "bin")) diff --git a/tests/coordinate_mode.nf.test b/tests/coordinate_mode.nf.test new file mode 100644 index 00000000..e0ff2ac4 --- /dev/null +++ b/tests/coordinate_mode.nf.test @@ -0,0 +1,36 @@ +nextflow_pipeline { + + name "Test pipeline for the `coordinate` mode, test run the proseg subworkflow" + script "../main.nf" + tag "pipeline" + config "../conf/test_coordinate_mode.config" + + test("-profile test stub") { + + options "-stub" + + when { + params { + outdir = "$outputDir" + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assertAll( + { assert workflow.success}, + { assert snapshot( + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we test pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_spatialaxe_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } +} diff --git a/tests/coordinate_mode.nf.test.snap b/tests/coordinate_mode.nf.test.snap new file mode 100644 index 00000000..72a452ec --- /dev/null +++ b/tests/coordinate_mode.nf.test.snap @@ -0,0 +1,152 @@ +{ + "-profile test stub": { + "content": [ + { + "PROSEG2BAYSOR": { + "proseg": "3.1.0" + }, + "PROSEG": { + "proseg": "3.1.0" + }, + "SPATIALDATA_MERGE_RAW_REDEFINED": { + "spatialdata": "0.7.2" + }, + "SPATIALDATA_META": { + "spatialdata": "0.7.2" + }, + "SPATIALDATA_WRITE_RAW_BUNDLE": { + "spatialdata": "0.7.2" + }, + "SPATIALDATA_WRITE_REDEFINED_BUNDLE": { + "spatialdata": "0.7.2" + }, + "UNTAR": { + "untar": 1.34 + }, + "Workflow": { + "nf-core/spatialaxe": "v1.1.0dev" + }, + "XENIUMRANGER_IMPORT_SEGMENTATION": { + "xeniumranger": "4.0.1.1" + } + }, + [ + "coordinate", + "coordinate/multiqc", + "coordinate/multiqc/raw_bundle", + "coordinate/multiqc/raw_bundle/multiqc_data", + "coordinate/multiqc/raw_bundle/multiqc_data/.stub", + "coordinate/multiqc/raw_bundle/multiqc_plots", + "coordinate/multiqc/raw_bundle/multiqc_plots/.stub", + "coordinate/multiqc/raw_bundle/multiqc_report.html", + "coordinate/multiqc/redefined_bundle", + "coordinate/multiqc/redefined_bundle/multiqc_data", + "coordinate/multiqc/redefined_bundle/multiqc_data/.stub", + "coordinate/multiqc/redefined_bundle/multiqc_plots", + "coordinate/multiqc/redefined_bundle/multiqc_plots/.stub", + "coordinate/multiqc/redefined_bundle/multiqc_report.html", + "coordinate/proseg", + "coordinate/proseg/preset", + "coordinate/proseg/preset/test_run", + "coordinate/proseg/preset/test_run/cell-polygons.geojson.gz", + "coordinate/proseg/preset/test_run/proseg-output.zarr", + "coordinate/proseg/preset/test_run/transcript-metadata.csv.gz", + "coordinate/proseg/proseg2baysor", + "coordinate/proseg/proseg2baysor/test_run", + "coordinate/proseg/proseg2baysor/test_run/cell-polygons.geojson", + "coordinate/proseg/proseg2baysor/test_run/transcript-metadata.csv", + "coordinate/spatialdata", + "coordinate/spatialdata/merge", + "coordinate/spatialdata/merge/spatialdata", + "coordinate/spatialdata/merge/spatialdata/test_run", + "coordinate/spatialdata/merge/spatialdata/test_run/merged_bundle", + "coordinate/spatialdata/merge/spatialdata/test_run/merged_bundle/fake_file.txt", + "coordinate/spatialdata/meta", + "coordinate/spatialdata/meta/spatialdata", + "coordinate/spatialdata/meta/spatialdata/test_run", + "coordinate/spatialdata/meta/spatialdata/test_run/metadata", + "coordinate/spatialdata/meta/spatialdata/test_run/metadata/fake_file.txt", + "coordinate/spatialdata/write", + "coordinate/spatialdata/write/spatialdata", + "coordinate/spatialdata/write/spatialdata/test_run", + "coordinate/spatialdata/write/spatialdata/test_run/raw_bundle", + "coordinate/spatialdata/write/spatialdata/test_run/raw_bundle/fake_file.txt", + "coordinate/spatialdata/write/spatialdata/test_run/redefined_bundle", + "coordinate/spatialdata/write/spatialdata/test_run/redefined_bundle/fake_file.txt", + "coordinate/untar", + "coordinate/untar/test_run", + "coordinate/untar/test_run/.end-of-run", + "coordinate/untar/test_run/analysis.tar.gz", + "coordinate/untar/test_run/analysis.zarr.zip", + "coordinate/untar/test_run/analysis_summary.html", + "coordinate/untar/test_run/aux_outputs.tar.gz", + "coordinate/untar/test_run/cell_boundaries.csv.gz", + "coordinate/untar/test_run/cell_boundaries.parquet", + "coordinate/untar/test_run/cell_feature_matrix.h5", + "coordinate/untar/test_run/cell_feature_matrix.tar.gz", + "coordinate/untar/test_run/cell_feature_matrix.zarr.zip", + "coordinate/untar/test_run/cells.csv.gz", + "coordinate/untar/test_run/cells.parquet", + "coordinate/untar/test_run/cells.zarr.zip", + "coordinate/untar/test_run/experiment.xenium", + "coordinate/untar/test_run/gene_panel.json", + "coordinate/untar/test_run/metrics_summary.csv", + "coordinate/untar/test_run/morphology.ome.tif", + "coordinate/untar/test_run/morphology_focus", + "coordinate/untar/test_run/morphology_focus/morphology_focus_0000.ome.tif", + "coordinate/untar/test_run/nucleus_boundaries.csv.gz", + "coordinate/untar/test_run/nucleus_boundaries.parquet", + "coordinate/untar/test_run/transcripts.parquet", + "coordinate/untar/test_run/transcripts.zarr.zip", + "coordinate/xeniumranger", + "coordinate/xeniumranger/import_segementation", + "coordinate/xeniumranger/import_segementation/test_run", + "coordinate/xeniumranger/import_segementation/test_run/experiment.xenium", + "pipeline_info", + "pipeline_info/nf_core_spatialaxe_software_mqc_versions.yml" + ], + [ + ".stub:md5,d41d8cd98f00b204e9800998ecf8427e", + ".stub:md5,d41d8cd98f00b204e9800998ecf8427e", + "multiqc_report.html:md5,d41d8cd98f00b204e9800998ecf8427e", + ".stub:md5,d41d8cd98f00b204e9800998ecf8427e", + ".stub:md5,d41d8cd98f00b204e9800998ecf8427e", + "multiqc_report.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "cell-polygons.geojson:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcript-metadata.csv:md5,d41d8cd98f00b204e9800998ecf8427e", + "fake_file.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "fake_file.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "fake_file.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "fake_file.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + ".end-of-run:md5,d41d8cd98f00b204e9800998ecf8427e", + "analysis.tar.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "analysis.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e", + "analysis_summary.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "aux_outputs.tar.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "cell_boundaries.csv.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "cell_boundaries.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "cell_feature_matrix.h5:md5,d41d8cd98f00b204e9800998ecf8427e", + "cell_feature_matrix.tar.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "cell_feature_matrix.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e", + "cells.csv.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "cells.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "cells.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e", + "experiment.xenium:md5,d41d8cd98f00b204e9800998ecf8427e", + "gene_panel.json:md5,d41d8cd98f00b204e9800998ecf8427e", + "metrics_summary.csv:md5,d41d8cd98f00b204e9800998ecf8427e", + "morphology.ome.tif:md5,d41d8cd98f00b204e9800998ecf8427e", + "morphology_focus_0000.ome.tif:md5,d41d8cd98f00b204e9800998ecf8427e", + "nucleus_boundaries.csv.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "nucleus_boundaries.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcripts.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcripts.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e", + "experiment.xenium:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "timestamp": "2026-05-18T21:58:08.945192717", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file diff --git a/tests/default.nf.test b/tests/default.nf.test index d5000f57..84c71fc2 100644 --- a/tests/default.nf.test +++ b/tests/default.nf.test @@ -1,10 +1,13 @@ nextflow_pipeline { - name "Test pipeline" + name "Test pipeline - runs the default pipeline tests (coordinate mode)" script "../main.nf" tag "pipeline" + config "../conf/test.config" - test("-profile test") { + test("-profile test stub") { + + options "-stub" when { params { @@ -13,19 +16,19 @@ nextflow_pipeline { } then { - // stable_path: All files + folders in ${params.outdir}/ with a stable path (including file name) - def stable_path = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) - // stable_content: All files in ${params.outdir}/ with stable content - def stable_content = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') - assert workflow.success + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') assertAll( + { assert workflow.success}, { assert snapshot( // pipeline versions.yml file for multiqc from which Nextflow version is removed because we test pipelines on multiple Nextflow versions - removeNextflowVersion("$outputDir/pipeline_info/nf_core_spatialxe_software_mqc_versions.yml"), + removeNextflowVersion("$outputDir/pipeline_info/nf_core_spatialaxe_software_mqc_versions.yml"), // All stable path name, with a relative path - stable_path, + stable_name, // All files with stable contents - stable_content + stable_path ).match() } ) } diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap new file mode 100644 index 00000000..3289c2a0 --- /dev/null +++ b/tests/default.nf.test.snap @@ -0,0 +1,152 @@ +{ + "-profile test stub": { + "content": [ + { + "PROSEG2BAYSOR": { + "proseg": "3.1.0" + }, + "PROSEG": { + "proseg": "3.1.0" + }, + "SPATIALDATA_MERGE_RAW_REDEFINED": { + "spatialdata": "0.7.2" + }, + "SPATIALDATA_META": { + "spatialdata": "0.7.2" + }, + "SPATIALDATA_WRITE_RAW_BUNDLE": { + "spatialdata": "0.7.2" + }, + "SPATIALDATA_WRITE_REDEFINED_BUNDLE": { + "spatialdata": "0.7.2" + }, + "UNTAR": { + "untar": 1.34 + }, + "Workflow": { + "nf-core/spatialaxe": "v1.1.0dev" + }, + "XENIUMRANGER_IMPORT_SEGMENTATION": { + "xeniumranger": "4.0.1.1" + } + }, + [ + "coordinate", + "coordinate/multiqc", + "coordinate/multiqc/raw_bundle", + "coordinate/multiqc/raw_bundle/multiqc_data", + "coordinate/multiqc/raw_bundle/multiqc_data/.stub", + "coordinate/multiqc/raw_bundle/multiqc_plots", + "coordinate/multiqc/raw_bundle/multiqc_plots/.stub", + "coordinate/multiqc/raw_bundle/multiqc_report.html", + "coordinate/multiqc/redefined_bundle", + "coordinate/multiqc/redefined_bundle/multiqc_data", + "coordinate/multiqc/redefined_bundle/multiqc_data/.stub", + "coordinate/multiqc/redefined_bundle/multiqc_plots", + "coordinate/multiqc/redefined_bundle/multiqc_plots/.stub", + "coordinate/multiqc/redefined_bundle/multiqc_report.html", + "coordinate/proseg", + "coordinate/proseg/preset", + "coordinate/proseg/preset/test_run", + "coordinate/proseg/preset/test_run/cell-polygons.geojson.gz", + "coordinate/proseg/preset/test_run/proseg-output.zarr", + "coordinate/proseg/preset/test_run/transcript-metadata.csv.gz", + "coordinate/proseg/proseg2baysor", + "coordinate/proseg/proseg2baysor/test_run", + "coordinate/proseg/proseg2baysor/test_run/cell-polygons.geojson", + "coordinate/proseg/proseg2baysor/test_run/transcript-metadata.csv", + "coordinate/spatialdata", + "coordinate/spatialdata/merge", + "coordinate/spatialdata/merge/spatialdata", + "coordinate/spatialdata/merge/spatialdata/test_run", + "coordinate/spatialdata/merge/spatialdata/test_run/merged_bundle", + "coordinate/spatialdata/merge/spatialdata/test_run/merged_bundle/fake_file.txt", + "coordinate/spatialdata/meta", + "coordinate/spatialdata/meta/spatialdata", + "coordinate/spatialdata/meta/spatialdata/test_run", + "coordinate/spatialdata/meta/spatialdata/test_run/metadata", + "coordinate/spatialdata/meta/spatialdata/test_run/metadata/fake_file.txt", + "coordinate/spatialdata/write", + "coordinate/spatialdata/write/spatialdata", + "coordinate/spatialdata/write/spatialdata/test_run", + "coordinate/spatialdata/write/spatialdata/test_run/raw_bundle", + "coordinate/spatialdata/write/spatialdata/test_run/raw_bundle/fake_file.txt", + "coordinate/spatialdata/write/spatialdata/test_run/redefined_bundle", + "coordinate/spatialdata/write/spatialdata/test_run/redefined_bundle/fake_file.txt", + "coordinate/untar", + "coordinate/untar/test_run", + "coordinate/untar/test_run/.end-of-run", + "coordinate/untar/test_run/analysis.tar.gz", + "coordinate/untar/test_run/analysis.zarr.zip", + "coordinate/untar/test_run/analysis_summary.html", + "coordinate/untar/test_run/aux_outputs.tar.gz", + "coordinate/untar/test_run/cell_boundaries.csv.gz", + "coordinate/untar/test_run/cell_boundaries.parquet", + "coordinate/untar/test_run/cell_feature_matrix.h5", + "coordinate/untar/test_run/cell_feature_matrix.tar.gz", + "coordinate/untar/test_run/cell_feature_matrix.zarr.zip", + "coordinate/untar/test_run/cells.csv.gz", + "coordinate/untar/test_run/cells.parquet", + "coordinate/untar/test_run/cells.zarr.zip", + "coordinate/untar/test_run/experiment.xenium", + "coordinate/untar/test_run/gene_panel.json", + "coordinate/untar/test_run/metrics_summary.csv", + "coordinate/untar/test_run/morphology.ome.tif", + "coordinate/untar/test_run/morphology_focus", + "coordinate/untar/test_run/morphology_focus/morphology_focus_0000.ome.tif", + "coordinate/untar/test_run/nucleus_boundaries.csv.gz", + "coordinate/untar/test_run/nucleus_boundaries.parquet", + "coordinate/untar/test_run/transcripts.parquet", + "coordinate/untar/test_run/transcripts.zarr.zip", + "coordinate/xeniumranger", + "coordinate/xeniumranger/import_segementation", + "coordinate/xeniumranger/import_segementation/test_run", + "coordinate/xeniumranger/import_segementation/test_run/experiment.xenium", + "pipeline_info", + "pipeline_info/nf_core_spatialaxe_software_mqc_versions.yml" + ], + [ + ".stub:md5,d41d8cd98f00b204e9800998ecf8427e", + ".stub:md5,d41d8cd98f00b204e9800998ecf8427e", + "multiqc_report.html:md5,d41d8cd98f00b204e9800998ecf8427e", + ".stub:md5,d41d8cd98f00b204e9800998ecf8427e", + ".stub:md5,d41d8cd98f00b204e9800998ecf8427e", + "multiqc_report.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "cell-polygons.geojson:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcript-metadata.csv:md5,d41d8cd98f00b204e9800998ecf8427e", + "fake_file.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "fake_file.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "fake_file.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "fake_file.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + ".end-of-run:md5,d41d8cd98f00b204e9800998ecf8427e", + "analysis.tar.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "analysis.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e", + "analysis_summary.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "aux_outputs.tar.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "cell_boundaries.csv.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "cell_boundaries.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "cell_feature_matrix.h5:md5,d41d8cd98f00b204e9800998ecf8427e", + "cell_feature_matrix.tar.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "cell_feature_matrix.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e", + "cells.csv.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "cells.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "cells.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e", + "experiment.xenium:md5,d41d8cd98f00b204e9800998ecf8427e", + "gene_panel.json:md5,d41d8cd98f00b204e9800998ecf8427e", + "metrics_summary.csv:md5,d41d8cd98f00b204e9800998ecf8427e", + "morphology.ome.tif:md5,d41d8cd98f00b204e9800998ecf8427e", + "morphology_focus_0000.ome.tif:md5,d41d8cd98f00b204e9800998ecf8427e", + "nucleus_boundaries.csv.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "nucleus_boundaries.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcripts.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcripts.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e", + "experiment.xenium:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "timestamp": "2026-05-18T21:55:37.544217665", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file diff --git a/tests/image_mode.nf.test b/tests/image_mode.nf.test new file mode 100644 index 00000000..4a594afa --- /dev/null +++ b/tests/image_mode.nf.test @@ -0,0 +1,36 @@ +nextflow_pipeline { + + name "Test pipeline for the `image` mode, test run the cellpose->baysor subworkflow" + script "../main.nf" + tag "pipeline" + config "../conf/test_image_mode.config" + + test("-profile test stub") { + + options "-stub" + + when { + params { + outdir = "$outputDir" + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assertAll( + { assert workflow.success}, + { assert snapshot( + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we test pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_spatialaxe_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } +} diff --git a/tests/image_mode.nf.test.snap b/tests/image_mode.nf.test.snap new file mode 100644 index 00000000..a9b73c2c --- /dev/null +++ b/tests/image_mode.nf.test.snap @@ -0,0 +1,167 @@ +{ + "-profile test stub": { + "content": [ + { + "BAYSOR_PREPROCESS_TRANSCRIPTS": { + "python": "3.14.4" + }, + "BAYSOR_RUN": { + "baysor": "0.7.1" + }, + "CELLPOSE_CELLS": { + "torch": "2.10.0+cu128" + }, + "RESIZE_TIF": { + "tifffile": "2026.2.24" + }, + "SPATIALDATA_MERGE_RAW_REDEFINED": { + "spatialdata": "0.7.2" + }, + "SPATIALDATA_META": { + "spatialdata": "0.7.2" + }, + "SPATIALDATA_WRITE_RAW_BUNDLE": { + "spatialdata": "0.7.2" + }, + "SPATIALDATA_WRITE_REDEFINED_BUNDLE": { + "spatialdata": "0.7.2" + }, + "UNTAR": { + "untar": 1.34 + }, + "Workflow": { + "nf-core/spatialaxe": "v1.1.0dev" + }, + "XENIUMRANGER_IMPORT_SEGMENTATION": { + "xeniumranger": "4.0.1.1" + } + }, + [ + "coordinate", + "coordinate/xeniumranger", + "coordinate/xeniumranger/import_segementation", + "coordinate/xeniumranger/import_segementation/test_run", + "coordinate/xeniumranger/import_segementation/test_run/experiment.xenium", + "image", + "image/baysor", + "image/baysor/preprocess", + "image/baysor/preprocess/test_run", + "image/baysor/preprocess/test_run/filtered_transcripts.csv", + "image/baysor/run", + "image/baysor/run/test_run", + "image/baysor/run/test_run/segmentation.csv", + "image/baysor/run/test_run/segmentation_polygons_2d.json", + "image/cellpose_cells", + "image/cellpose_cells/test_run", + "image/cellpose_cells/test_run/morphology_focus_0000.ome_cp_masks.tif", + "image/multiqc", + "image/multiqc/raw_bundle", + "image/multiqc/raw_bundle/multiqc_data", + "image/multiqc/raw_bundle/multiqc_data/.stub", + "image/multiqc/raw_bundle/multiqc_plots", + "image/multiqc/raw_bundle/multiqc_plots/.stub", + "image/multiqc/raw_bundle/multiqc_report.html", + "image/multiqc/redefined_bundle", + "image/multiqc/redefined_bundle/multiqc_data", + "image/multiqc/redefined_bundle/multiqc_data/.stub", + "image/multiqc/redefined_bundle/multiqc_plots", + "image/multiqc/redefined_bundle/multiqc_plots/.stub", + "image/multiqc/redefined_bundle/multiqc_report.html", + "image/spatialdata", + "image/spatialdata/merge", + "image/spatialdata/merge/spatialdata", + "image/spatialdata/merge/spatialdata/test_run", + "image/spatialdata/merge/spatialdata/test_run/merged_bundle", + "image/spatialdata/merge/spatialdata/test_run/merged_bundle/fake_file.txt", + "image/spatialdata/meta", + "image/spatialdata/meta/spatialdata", + "image/spatialdata/meta/spatialdata/test_run", + "image/spatialdata/meta/spatialdata/test_run/metadata", + "image/spatialdata/meta/spatialdata/test_run/metadata/fake_file.txt", + "image/spatialdata/write", + "image/spatialdata/write/spatialdata", + "image/spatialdata/write/spatialdata/test_run", + "image/spatialdata/write/spatialdata/test_run/raw_bundle", + "image/spatialdata/write/spatialdata/test_run/raw_bundle/fake_file.txt", + "image/spatialdata/write/spatialdata/test_run/redefined_bundle", + "image/spatialdata/write/spatialdata/test_run/redefined_bundle/fake_file.txt", + "image/untar", + "image/untar/test_run", + "image/untar/test_run/.end-of-run", + "image/untar/test_run/analysis.tar.gz", + "image/untar/test_run/analysis.zarr.zip", + "image/untar/test_run/analysis_summary.html", + "image/untar/test_run/aux_outputs.tar.gz", + "image/untar/test_run/cell_boundaries.csv.gz", + "image/untar/test_run/cell_boundaries.parquet", + "image/untar/test_run/cell_feature_matrix.h5", + "image/untar/test_run/cell_feature_matrix.tar.gz", + "image/untar/test_run/cell_feature_matrix.zarr.zip", + "image/untar/test_run/cells.csv.gz", + "image/untar/test_run/cells.parquet", + "image/untar/test_run/cells.zarr.zip", + "image/untar/test_run/experiment.xenium", + "image/untar/test_run/gene_panel.json", + "image/untar/test_run/metrics_summary.csv", + "image/untar/test_run/morphology.ome.tif", + "image/untar/test_run/morphology_focus", + "image/untar/test_run/morphology_focus/morphology_focus_0000.ome.tif", + "image/untar/test_run/nucleus_boundaries.csv.gz", + "image/untar/test_run/nucleus_boundaries.parquet", + "image/untar/test_run/transcripts.parquet", + "image/untar/test_run/transcripts.zarr.zip", + "image/utility", + "image/utility/resize_tif", + "image/utility/resize_tif/test_run", + "image/utility/resize_tif/test_run/resized_morphology_focus_0000.ome_cp_masks.tif.tif", + "pipeline_info", + "pipeline_info/nf_core_spatialaxe_software_mqc_versions.yml" + ], + [ + "experiment.xenium:md5,d41d8cd98f00b204e9800998ecf8427e", + "filtered_transcripts.csv:md5,d41d8cd98f00b204e9800998ecf8427e", + "segmentation.csv:md5,d41d8cd98f00b204e9800998ecf8427e", + "segmentation_polygons_2d.json:md5,d41d8cd98f00b204e9800998ecf8427e", + "morphology_focus_0000.ome_cp_masks.tif:md5,d41d8cd98f00b204e9800998ecf8427e", + ".stub:md5,d41d8cd98f00b204e9800998ecf8427e", + ".stub:md5,d41d8cd98f00b204e9800998ecf8427e", + "multiqc_report.html:md5,d41d8cd98f00b204e9800998ecf8427e", + ".stub:md5,d41d8cd98f00b204e9800998ecf8427e", + ".stub:md5,d41d8cd98f00b204e9800998ecf8427e", + "multiqc_report.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "fake_file.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "fake_file.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "fake_file.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "fake_file.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + ".end-of-run:md5,d41d8cd98f00b204e9800998ecf8427e", + "analysis.tar.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "analysis.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e", + "analysis_summary.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "aux_outputs.tar.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "cell_boundaries.csv.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "cell_boundaries.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "cell_feature_matrix.h5:md5,d41d8cd98f00b204e9800998ecf8427e", + "cell_feature_matrix.tar.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "cell_feature_matrix.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e", + "cells.csv.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "cells.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "cells.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e", + "experiment.xenium:md5,d41d8cd98f00b204e9800998ecf8427e", + "gene_panel.json:md5,d41d8cd98f00b204e9800998ecf8427e", + "metrics_summary.csv:md5,d41d8cd98f00b204e9800998ecf8427e", + "morphology.ome.tif:md5,d41d8cd98f00b204e9800998ecf8427e", + "morphology_focus_0000.ome.tif:md5,d41d8cd98f00b204e9800998ecf8427e", + "nucleus_boundaries.csv.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "nucleus_boundaries.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcripts.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcripts.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e", + "resized_morphology_focus_0000.ome_cp_masks.tif.tif:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "timestamp": "2026-05-18T22:00:39.030306506", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file diff --git a/tests/nextflow.config b/tests/nextflow.config index caf25a7b..2c1bfd32 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -4,11 +4,10 @@ ======================================================================================== */ -// TODO nf-core: Specify any additional parameters here // Or any resources requirements params { modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' - pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/spatialxe/' + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/spatialaxe' } aws.client.anonymous = true // fixes S3 access issues on self-hosted runners diff --git a/tests/preview_mode.nf.test b/tests/preview_mode.nf.test new file mode 100644 index 00000000..5a423490 --- /dev/null +++ b/tests/preview_mode.nf.test @@ -0,0 +1,36 @@ +nextflow_pipeline { + + name "Test pipeline for the `preview` mode, test run the basyor-preview subworkflow" + script "../main.nf" + tag "pipeline" + config "../conf/test_preview_mode.config" + + test("-profile test stub") { + + options "-stub" + + when { + params { + outdir = "$outputDir" + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assertAll( + { assert workflow.success}, + { assert snapshot( + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we test pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_spatialaxe_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } +} diff --git a/tests/preview_mode.nf.test.snap b/tests/preview_mode.nf.test.snap new file mode 100644 index 00000000..3d364f04 --- /dev/null +++ b/tests/preview_mode.nf.test.snap @@ -0,0 +1,120 @@ +{ + "-profile test stub": { + "content": [ + { + "BAYSOR_CREATE_DATASET": { + "baysor": "0.7.1" + }, + "BAYSOR_PREVIEW": { + "baysor": "0.7.1" + }, + "EXTRACT_PREVIEW_DATA": { + "python": "3.14.4" + }, + "PARQUET_TO_CSV": { + "pyarrow": "24.0.0" + }, + "UNTAR": { + "untar": 1.34 + }, + "Workflow": { + "nf-core/spatialaxe": "v1.1.0dev" + } + }, + [ + "pipeline_info", + "pipeline_info/nf_core_spatialaxe_software_mqc_versions.yml", + "preview", + "preview/baysor", + "preview/baysor/create_dataset", + "preview/baysor/create_dataset/test_run", + "preview/baysor/create_dataset/test_run/sampled_transcripts.csv", + "preview/baysor/preview", + "preview/baysor/preview/test_run", + "preview/baysor/preview/test_run/preview.html", + "preview/multiqc", + "preview/multiqc/multiqc_data", + "preview/multiqc/multiqc_data/.stub", + "preview/multiqc/multiqc_plots", + "preview/multiqc/multiqc_plots/.stub", + "preview/multiqc/multiqc_report.html", + "preview/untar", + "preview/untar/test_run", + "preview/untar/test_run/.end-of-run", + "preview/untar/test_run/analysis.tar.gz", + "preview/untar/test_run/analysis.zarr.zip", + "preview/untar/test_run/analysis_summary.html", + "preview/untar/test_run/aux_outputs.tar.gz", + "preview/untar/test_run/cell_boundaries.csv.gz", + "preview/untar/test_run/cell_boundaries.parquet", + "preview/untar/test_run/cell_feature_matrix.h5", + "preview/untar/test_run/cell_feature_matrix.tar.gz", + "preview/untar/test_run/cell_feature_matrix.zarr.zip", + "preview/untar/test_run/cells.csv.gz", + "preview/untar/test_run/cells.parquet", + "preview/untar/test_run/cells.zarr.zip", + "preview/untar/test_run/experiment.xenium", + "preview/untar/test_run/gene_panel.json", + "preview/untar/test_run/metrics_summary.csv", + "preview/untar/test_run/morphology.ome.tif", + "preview/untar/test_run/morphology_focus", + "preview/untar/test_run/morphology_focus/morphology_focus_0000.ome.tif", + "preview/untar/test_run/nucleus_boundaries.csv.gz", + "preview/untar/test_run/nucleus_boundaries.parquet", + "preview/untar/test_run/transcripts.parquet", + "preview/untar/test_run/transcripts.zarr.zip", + "preview/utility", + "preview/utility/parquet_to_csv", + "preview/utility/parquet_to_csv/test_run", + "preview/utility/parquet_to_csv/test_run/transcripts.parquet.csv", + "preview/utility/preview_data", + "preview/utility/preview_data/test_run", + "preview/utility/preview_data/test_run/gene_structure_mqc.tsv", + "preview/utility/preview_data/test_run/noise_distribution_mqc.tsv", + "preview/utility/preview_data/test_run/noise_level_mqc.png", + "preview/utility/preview_data/test_run/transcript_plots_mqc.png", + "preview/utility/preview_data/test_run/umap_mqc.tsv" + ], + [ + "sampled_transcripts.csv:md5,d41d8cd98f00b204e9800998ecf8427e", + "preview.html:md5,d41d8cd98f00b204e9800998ecf8427e", + ".stub:md5,d41d8cd98f00b204e9800998ecf8427e", + ".stub:md5,d41d8cd98f00b204e9800998ecf8427e", + "multiqc_report.html:md5,d41d8cd98f00b204e9800998ecf8427e", + ".end-of-run:md5,d41d8cd98f00b204e9800998ecf8427e", + "analysis.tar.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "analysis.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e", + "analysis_summary.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "aux_outputs.tar.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "cell_boundaries.csv.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "cell_boundaries.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "cell_feature_matrix.h5:md5,d41d8cd98f00b204e9800998ecf8427e", + "cell_feature_matrix.tar.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "cell_feature_matrix.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e", + "cells.csv.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "cells.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "cells.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e", + "experiment.xenium:md5,d41d8cd98f00b204e9800998ecf8427e", + "gene_panel.json:md5,d41d8cd98f00b204e9800998ecf8427e", + "metrics_summary.csv:md5,d41d8cd98f00b204e9800998ecf8427e", + "morphology.ome.tif:md5,d41d8cd98f00b204e9800998ecf8427e", + "morphology_focus_0000.ome.tif:md5,d41d8cd98f00b204e9800998ecf8427e", + "nucleus_boundaries.csv.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "nucleus_boundaries.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcripts.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcripts.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcripts.parquet.csv:md5,d41d8cd98f00b204e9800998ecf8427e", + "gene_structure_mqc.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "noise_distribution_mqc.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "noise_level_mqc.png:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcript_plots_mqc.png:md5,d41d8cd98f00b204e9800998ecf8427e", + "umap_mqc.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "timestamp": "2026-06-17T11:15:30.764424637", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.2" + } + } +} \ No newline at end of file diff --git a/tests/segfree_mode.nf.test b/tests/segfree_mode.nf.test new file mode 100644 index 00000000..854b17d5 --- /dev/null +++ b/tests/segfree_mode.nf.test @@ -0,0 +1,36 @@ +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + tag "pipeline" + config "../conf/test_segfree_mode.config" + + test("-profile test stub") { + + options "-stub" + + when { + params { + outdir = "$outputDir" + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assertAll( + { assert workflow.success}, + { assert snapshot( + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we test pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_spatialaxe_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } +} diff --git a/tests/segfree_mode.nf.test.snap b/tests/segfree_mode.nf.test.snap new file mode 100644 index 00000000..ac663a43 --- /dev/null +++ b/tests/segfree_mode.nf.test.snap @@ -0,0 +1,97 @@ +{ + "-profile test stub": { + "content": [ + { + "BAYSOR_PREPROCESS_TRANSCRIPTS": { + "python": "3.14.4" + }, + "BAYSOR_SEGFREE": { + "baysor": "0.7.1" + }, + "UNTAR": { + "untar": 1.34 + }, + "Workflow": { + "nf-core/spatialaxe": "v1.1.0dev" + } + }, + [ + "pipeline_info", + "pipeline_info/nf_core_spatialaxe_software_mqc_versions.yml", + "segfree", + "segfree/baysor", + "segfree/baysor/preprocess", + "segfree/baysor/preprocess/test_run", + "segfree/baysor/preprocess/test_run/filtered_transcripts.csv", + "segfree/baysor/segfree", + "segfree/baysor/segfree/test_run", + "segfree/baysor/segfree/test_run/ncvs.loom", + "segfree/multiqc", + "segfree/multiqc/multiqc_data", + "segfree/multiqc/multiqc_data/.stub", + "segfree/multiqc/multiqc_plots", + "segfree/multiqc/multiqc_plots/.stub", + "segfree/multiqc/multiqc_report.html", + "segfree/untar", + "segfree/untar/test_run", + "segfree/untar/test_run/.end-of-run", + "segfree/untar/test_run/analysis.tar.gz", + "segfree/untar/test_run/analysis.zarr.zip", + "segfree/untar/test_run/analysis_summary.html", + "segfree/untar/test_run/aux_outputs.tar.gz", + "segfree/untar/test_run/cell_boundaries.csv.gz", + "segfree/untar/test_run/cell_boundaries.parquet", + "segfree/untar/test_run/cell_feature_matrix.h5", + "segfree/untar/test_run/cell_feature_matrix.tar.gz", + "segfree/untar/test_run/cell_feature_matrix.zarr.zip", + "segfree/untar/test_run/cells.csv.gz", + "segfree/untar/test_run/cells.parquet", + "segfree/untar/test_run/cells.zarr.zip", + "segfree/untar/test_run/experiment.xenium", + "segfree/untar/test_run/gene_panel.json", + "segfree/untar/test_run/metrics_summary.csv", + "segfree/untar/test_run/morphology.ome.tif", + "segfree/untar/test_run/morphology_focus", + "segfree/untar/test_run/morphology_focus/morphology_focus_0000.ome.tif", + "segfree/untar/test_run/nucleus_boundaries.csv.gz", + "segfree/untar/test_run/nucleus_boundaries.parquet", + "segfree/untar/test_run/transcripts.parquet", + "segfree/untar/test_run/transcripts.zarr.zip" + ], + [ + "filtered_transcripts.csv:md5,d41d8cd98f00b204e9800998ecf8427e", + "ncvs.loom:md5,d41d8cd98f00b204e9800998ecf8427e", + ".stub:md5,d41d8cd98f00b204e9800998ecf8427e", + ".stub:md5,d41d8cd98f00b204e9800998ecf8427e", + "multiqc_report.html:md5,d41d8cd98f00b204e9800998ecf8427e", + ".end-of-run:md5,d41d8cd98f00b204e9800998ecf8427e", + "analysis.tar.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "analysis.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e", + "analysis_summary.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "aux_outputs.tar.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "cell_boundaries.csv.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "cell_boundaries.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "cell_feature_matrix.h5:md5,d41d8cd98f00b204e9800998ecf8427e", + "cell_feature_matrix.tar.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "cell_feature_matrix.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e", + "cells.csv.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "cells.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "cells.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e", + "experiment.xenium:md5,d41d8cd98f00b204e9800998ecf8427e", + "gene_panel.json:md5,d41d8cd98f00b204e9800998ecf8427e", + "metrics_summary.csv:md5,d41d8cd98f00b204e9800998ecf8427e", + "morphology.ome.tif:md5,d41d8cd98f00b204e9800998ecf8427e", + "morphology_focus_0000.ome.tif:md5,d41d8cd98f00b204e9800998ecf8427e", + "nucleus_boundaries.csv.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "nucleus_boundaries.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcripts.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcripts.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "timestamp": "2026-05-18T22:02:39.947804998", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file diff --git a/tests/test_xenium_patch/__init__.py b/tests/test_xenium_patch/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_xenium_patch/test_divide_transcripts.py b/tests/test_xenium_patch/test_divide_transcripts.py new file mode 100644 index 00000000..859fd5ae --- /dev/null +++ b/tests/test_xenium_patch/test_divide_transcripts.py @@ -0,0 +1,720 @@ +"""Tests for divide_transcripts.py — grid computation + transcript division.""" + +import importlib.util +import json +import math +import sys +from pathlib import Path + +import numpy as np +import pyarrow as pa +import pyarrow.parquet as pq +import pytest + +# --------------------------------------------------------------------------- +# Import the standalone script from bin/ +# --------------------------------------------------------------------------- + +_BIN_DIR = Path(__file__).resolve().parents[2] / "bin" +_SCRIPT = _BIN_DIR / "divide_transcripts.py" +_spec = importlib.util.spec_from_file_location("divide_transcripts", _SCRIPT) +_mod = importlib.util.module_from_spec(_spec) +sys.modules["divide_transcripts"] = _mod +_spec.loader.exec_module(_mod) + +from divide_transcripts import ( # noqa: E402 + Bounds, + PatchInfo, + _compute_uniform_grid, + _count_transcripts_per_tile, + _find_adjacent_patches, + compute_density_quadtree_grid, + compute_tilewidth_uniform_grid, + divide_transcripts, + merge_sparse_tiles, + save_grid_metadata, +) + +PIXEL_SIZE = 0.2125 + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def synthetic_transcripts(tmp_path: Path) -> Path: + """Write a synthetic transcripts.parquet with 1000 rows, uniform spatial distribution.""" + rng = np.random.default_rng(42) + n = 1000 + table = pa.table( + { + "transcript_id": pa.array([f"tx_{i}" for i in range(n)], type=pa.string()), + "cell_id": pa.array(["UNASSIGNED"] * n, type=pa.string()), + "overlaps_nucleus": pa.array([0] * n, type=pa.int32()), + "feature_name": pa.array( + [f"gene_{i % 50}" for i in range(n)], type=pa.string() + ), + "x_location": pa.array(rng.uniform(0.0, 1275.0, n), type=pa.float32()), + "y_location": pa.array(rng.uniform(0.0, 1275.0, n), type=pa.float32()), + "z_location": pa.array(rng.uniform(0.0, 10.0, n), type=pa.float32()), + "qv": pa.array(rng.uniform(20.0, 40.0, n), type=pa.float32()), + } + ) + path = tmp_path / "transcripts.parquet" + pq.write_table(table, str(path)) + return path + + +@pytest.fixture +def dense_corner_transcripts(tmp_path: Path) -> Path: + """90% of transcripts in the top-left corner, 10% uniform.""" + rng = np.random.default_rng(99) + n_dense = 900 + n_sparse = 100 + n = n_dense + n_sparse + + x_dense = rng.uniform(0.0, 50.0, n_dense) + y_dense = rng.uniform(0.0, 50.0, n_dense) + x_sparse = rng.uniform(0.0, 1275.0, n_sparse) + y_sparse = rng.uniform(0.0, 1275.0, n_sparse) + + table = pa.table( + { + "transcript_id": pa.array([f"tx_{i}" for i in range(n)], type=pa.string()), + "cell_id": pa.array(["UNASSIGNED"] * n, type=pa.string()), + "overlaps_nucleus": pa.array([0] * n, type=pa.int32()), + "feature_name": pa.array( + [f"gene_{i % 50}" for i in range(n)], type=pa.string() + ), + "x_location": pa.array( + np.concatenate([x_dense, x_sparse]).astype(np.float32), + type=pa.float32(), + ), + "y_location": pa.array( + np.concatenate([y_dense, y_sparse]).astype(np.float32), + type=pa.float32(), + ), + "z_location": pa.array( + rng.uniform(0.0, 10.0, n).astype(np.float32), type=pa.float32() + ), + "qv": pa.array( + rng.uniform(20.0, 40.0, n).astype(np.float32), type=pa.float32() + ), + } + ) + path = tmp_path / "transcripts_dense.parquet" + pq.write_table(table, str(path)) + return path + + +# --------------------------------------------------------------------------- +# Uniform grid tests +# --------------------------------------------------------------------------- + + +class TestUniformGridBasic: + def test_uniform_grid_basic(self): + """3x3 grid from 2000um tile width on a 6000x6000um image.""" + # Use exact pixel count: 6000um / 0.2125um/px = 28235.29... -> round to 28235 + # image_um = 28235 * 0.2125 = 5999.9375; ceil(5999.9375 / 2000) = 3 + image_px = int(6000.0 / PIXEL_SIZE) + extent = Bounds(0.0, 6000.0, 0.0, 6000.0) + + patches, rows, cols, overlap_px = compute_tilewidth_uniform_grid( + image_height_px=image_px, + image_width_px=image_px, + tile_width_um=2000.0, + overlap_um=50.0, + pixel_size_um=PIXEL_SIZE, + transcript_extent_um=extent, + ) + + assert rows == 3 + assert cols == 3 + assert len(patches) == 9 + + def test_uniform_grid_single_tile(self): + """Tile width larger than image produces a 1x1 grid.""" + image_px = 1000 + extent = Bounds(0.0, image_px * PIXEL_SIZE, 0.0, image_px * PIXEL_SIZE) + + patches, rows, cols, _ = compute_tilewidth_uniform_grid( + image_height_px=image_px, + image_width_px=image_px, + tile_width_um=50000.0, + overlap_um=50.0, + pixel_size_um=PIXEL_SIZE, + transcript_extent_um=extent, + ) + + assert rows == 1 + assert cols == 1 + assert len(patches) == 1 + + def test_uniform_grid_overlap(self): + """Global bounds extend beyond core by the overlap amount.""" + image_px = 1000 + overlap_um = 50.0 + + patches = _compute_uniform_grid( + image_height_px=image_px, + image_width_px=image_px, + grid_rows=2, + grid_cols=2, + overlap_px=int(math.ceil(overlap_um / PIXEL_SIZE)), + pixel_size_um=PIXEL_SIZE, + ) + + # Interior patch boundary: global should extend beyond core + for p in patches: + assert p.global_bounds_px.x_min <= p.core_bounds_px.x_min + assert p.global_bounds_px.x_max >= p.core_bounds_px.x_max + assert p.global_bounds_px.y_min <= p.core_bounds_px.y_min + assert p.global_bounds_px.y_max >= p.core_bounds_px.y_max + + +# --------------------------------------------------------------------------- +# Quadtree grid tests +# --------------------------------------------------------------------------- + + +class TestQuadtreeGrid: + def test_quadtree_uniform_density(self): + """When density is uniform with high threshold, quadtree should not subdivide.""" + rng = np.random.default_rng(42) + image_px = 1000 + image_um = image_px * PIXEL_SIZE + + x = rng.uniform(0, image_um, 1000) + y = rng.uniform(0, image_um, 1000) + + patches, _, _, _ = compute_density_quadtree_grid( + image_height_px=image_px, + image_width_px=image_px, + tile_width_um=100.0, + overlap_um=10.0, + pixel_size_um=PIXEL_SIZE, + x_coords_um=x, + y_coords_um=y, + max_transcripts_per_patch=10000, + ) + + # ceil(212.5/100)=3 -> 3x3 = 9 initial patches, no subdivision + assert len(patches) == 9 + + def test_quadtree_dense_region(self): + """Put 90% of transcripts in one corner, verify subdivision produces more patches.""" + rng = np.random.default_rng(42) + image_px = 1000 + image_um = image_px * PIXEL_SIZE + + x_sparse = rng.uniform(0, image_um, 100) + y_sparse = rng.uniform(0, image_um, 100) + x_dense = rng.uniform(0, image_um * 0.2, 5000) + y_dense = rng.uniform(0, image_um * 0.2, 5000) + x = np.concatenate([x_sparse, x_dense]) + y = np.concatenate([y_sparse, y_dense]) + + patches, _, _, _ = compute_density_quadtree_grid( + image_height_px=image_px, + image_width_px=image_px, + tile_width_um=100.0, + overlap_um=10.0, + pixel_size_um=PIXEL_SIZE, + x_coords_um=x, + y_coords_um=y, + max_transcripts_per_patch=500, + min_tile_width_um=10.0, + max_depth=4, + ) + + # Should have subdivided beyond the initial 9 + assert len(patches) > 9 + + def test_quadtree_max_depth(self): + """Verify subdivision stops at max_depth: deeper depth -> more patches.""" + rng = np.random.default_rng(42) + image_px = 1000 + image_um = image_px * PIXEL_SIZE + + x = rng.normal(image_um / 2, 5.0, 10000) + y = rng.normal(image_um / 2, 5.0, 10000) + + patches_d1, _, _, _ = compute_density_quadtree_grid( + image_height_px=image_px, + image_width_px=image_px, + tile_width_um=100.0, + overlap_um=10.0, + pixel_size_um=PIXEL_SIZE, + x_coords_um=x, + y_coords_um=y, + max_transcripts_per_patch=10, + min_tile_width_um=1.0, + max_depth=1, + ) + + patches_d4, _, _, _ = compute_density_quadtree_grid( + image_height_px=image_px, + image_width_px=image_px, + tile_width_um=100.0, + overlap_um=10.0, + pixel_size_um=PIXEL_SIZE, + x_coords_um=x, + y_coords_um=y, + max_transcripts_per_patch=10, + min_tile_width_um=1.0, + max_depth=4, + ) + + assert len(patches_d4) > len(patches_d1) + + def test_quadtree_min_tile_width(self): + """Verify subdivision stops at min_tile_width: all cores >= min width.""" + rng = np.random.default_rng(42) + image_px = 1000 + image_um = image_px * PIXEL_SIZE + min_tile_um = 30.0 + + x = rng.normal(image_um / 2, 5.0, 10000) + y = rng.normal(image_um / 2, 5.0, 10000) + + patches, _, _, _ = compute_density_quadtree_grid( + image_height_px=image_px, + image_width_px=image_px, + tile_width_um=100.0, + overlap_um=10.0, + pixel_size_um=PIXEL_SIZE, + x_coords_um=x, + y_coords_um=y, + max_transcripts_per_patch=10, + min_tile_width_um=min_tile_um, + max_depth=10, + ) + + for p in patches: + # Allow 1um rounding tolerance from pixel conversion + assert p.core_bounds_um.width >= min_tile_um - 1.0, ( + f"Patch {p.patch_id} width {p.core_bounds_um.width:.1f} < min {min_tile_um}" + ) + assert p.core_bounds_um.height >= min_tile_um - 1.0, ( + f"Patch {p.patch_id} height {p.core_bounds_um.height:.1f} < min {min_tile_um}" + ) + + +# --------------------------------------------------------------------------- +# Division tests +# --------------------------------------------------------------------------- + + +class TestDivideTranscripts: + def test_divide_transcripts_basic( + self, synthetic_transcripts: Path, tmp_path: Path + ): + """Divide synthetic parquet, verify per-patch files are written.""" + output_dir = tmp_path / "output" + + divide_transcripts( + transcripts_path=synthetic_transcripts, + output_dir=output_dir, + image_width_px=6000, + image_height_px=6000, + tile_width_um=1000.0, + overlap_um=50.0, + balanced=False, + pixel_size_um=PIXEL_SIZE, + max_workers=1, + ) + + # Grid metadata should exist + grid_json = output_dir / "patch_grid.json" + assert grid_json.exists() + + with open(grid_json) as f: + metadata = json.load(f) + + # Each patch should have a transcripts.parquet file + for patch in metadata["patches"]: + patch_parquet = output_dir / patch["patch_id"] / "transcripts.parquet" + assert patch_parquet.exists(), f"Missing parquet for {patch['patch_id']}" + + def test_divide_transcripts_coordinates_offset( + self, synthetic_transcripts: Path, tmp_path: Path + ): + """Verify coordinates are offset to patch-local space.""" + output_dir = tmp_path / "output" + + divide_transcripts( + transcripts_path=synthetic_transcripts, + output_dir=output_dir, + image_width_px=6000, + image_height_px=6000, + tile_width_um=1000.0, + overlap_um=50.0, + balanced=False, + pixel_size_um=PIXEL_SIZE, + max_workers=1, + ) + + with open(output_dir / "patch_grid.json") as f: + metadata = json.load(f) + + for patch_meta in metadata["patches"]: + patch_parquet = output_dir / patch_meta["patch_id"] / "transcripts.parquet" + if not patch_parquet.exists(): + continue + tbl = pq.read_table(str(patch_parquet)) + if tbl.num_rows == 0: + continue + + gb = patch_meta["global_bounds_um"] + patch_width = gb["x_max"] - gb["x_min"] + patch_height = gb["y_max"] - gb["y_min"] + + x_arr = tbl.column("x_location").to_numpy() + y_arr = tbl.column("y_location").to_numpy() + + # Local coords should be in [0, patch_width) x [0, patch_height) + assert float(np.min(x_arr)) >= -0.01, ( + f"Patch {patch_meta['patch_id']}: x_min={np.min(x_arr)} < 0" + ) + assert float(np.max(x_arr)) < patch_width + 0.01, ( + f"Patch {patch_meta['patch_id']}: x_max={np.max(x_arr)} >= {patch_width}" + ) + assert float(np.min(y_arr)) >= -0.01 + assert float(np.max(y_arr)) < patch_height + 0.01 + + def test_divide_transcripts_no_transcript_loss( + self, synthetic_transcripts: Path, tmp_path: Path + ): + """Verify all transcripts appear in at least one patch.""" + output_dir = tmp_path / "output" + + original = pq.read_table(str(synthetic_transcripts)) + original_ids = set(original.column("transcript_id").to_pylist()) + + divide_transcripts( + transcripts_path=synthetic_transcripts, + output_dir=output_dir, + image_width_px=6000, + image_height_px=6000, + tile_width_um=1000.0, + overlap_um=50.0, + balanced=False, + pixel_size_um=PIXEL_SIZE, + max_workers=1, + ) + + with open(output_dir / "patch_grid.json") as f: + metadata = json.load(f) + + found_ids: set[str] = set() + for patch_meta in metadata["patches"]: + patch_parquet = output_dir / patch_meta["patch_id"] / "transcripts.parquet" + if patch_parquet.exists(): + tbl = pq.read_table(str(patch_parquet)) + found_ids.update(tbl.column("transcript_id").to_pylist()) + + # Every original transcript must appear in at least one patch + missing = original_ids - found_ids + assert len(missing) == 0, f"{len(missing)} transcripts lost during division" + + +# --------------------------------------------------------------------------- +# Grid metadata JSON roundtrip +# --------------------------------------------------------------------------- + + +class TestGridMetadataJSON: + def test_grid_metadata_json_roundtrip(self, tmp_path: Path): + """Save + load grid metadata preserves all fields.""" + image_px = 1000 + extent = Bounds(0.0, image_px * PIXEL_SIZE, 0.0, image_px * PIXEL_SIZE) + patches, rows, cols, overlap_px = compute_tilewidth_uniform_grid( + image_height_px=image_px, + image_width_px=image_px, + tile_width_um=100.0, + overlap_um=50.0, + pixel_size_um=PIXEL_SIZE, + transcript_extent_um=extent, + ) + + path = tmp_path / "patch_grid.json" + save_grid_metadata( + patches=patches, + image_height_px=image_px, + image_width_px=image_px, + pixel_size_um=PIXEL_SIZE, + transcript_extent_um=extent, + grid_rows=rows, + grid_cols=cols, + overlap_um=50.0, + overlap_px=overlap_px, + grid_type="uniform", + output_path=path, + ) + + with open(path) as f: + data = json.load(f) + + assert data["version"] == "1.0" + assert data["grid_rows"] == rows + assert data["grid_cols"] == cols + assert data["overlap_um"] == 50.0 + assert data["overlap_px"] == overlap_px + assert data["grid_type"] == "uniform" + assert len(data["patches"]) == len(patches) + + for orig, loaded in zip(patches, data["patches"]): + assert loaded["patch_id"] == orig.patch_id + assert loaded["row"] == orig.row + assert loaded["col"] == orig.col + assert loaded["global_bounds_px"]["x_min"] == pytest.approx( + orig.global_bounds_px.x_min + ) + assert loaded["core_bounds_um"]["y_max"] == pytest.approx( + orig.core_bounds_um.y_max + ) + + +# --------------------------------------------------------------------------- +# Merge sparse tiles tests +# --------------------------------------------------------------------------- + + +def _make_2x2_grid(pixel_size: float = PIXEL_SIZE) -> tuple[list[PatchInfo], int, int]: + """Build a 2x2 uniform grid on a 1000x1000 pixel image. + + Returns: + Tuple of (patches, image_width_px, image_height_px). + """ + image_px = 1000 + overlap_px = int(math.ceil(50.0 / pixel_size)) + patches = _compute_uniform_grid( + image_height_px=image_px, + image_width_px=image_px, + grid_rows=2, + grid_cols=2, + overlap_px=overlap_px, + pixel_size_um=pixel_size, + ) + return patches, image_px, overlap_px + + +class TestMergeSparseTiles: + def test_no_merge_above_threshold(self): + """All tiles above threshold -- no merging happens.""" + patches, image_px, overlap_px = _make_2x2_grid() + image_um = image_px * PIXEL_SIZE + + rng = np.random.default_rng(42) + n = 4000 + x = rng.uniform(0, image_um, n).astype(np.float64) + y = rng.uniform(0, image_um, n).astype(np.float64) + + merged, merge_count = merge_sparse_tiles( + patches=patches, + x_coords_um=x, + y_coords_um=y, + overlap_px=overlap_px, + pixel_size_um=PIXEL_SIZE, + image_width_px=image_px, + image_height_px=image_px, + min_transcripts=100, + ) + + assert merge_count == 0 + assert len(merged) == len(patches) + merged_ids = {p.patch_id for p in merged} + original_ids = {p.patch_id for p in patches} + assert merged_ids == original_ids + + def test_merge_sparse_edge_tile(self): + """One corner tile has very few transcripts -- it gets merged into a neighbor.""" + patches, image_px, overlap_px = _make_2x2_grid() + + # Put 500 transcripts in each of 3 tiles, 5 in the last tile (row0_col0) + rng = np.random.default_rng(7) + # Find the core bounds of each patch to place transcripts correctly + patch_map = {p.patch_id: p for p in patches} + + sparse_id = patches[0].patch_id # first tile gets very few transcripts + xs, ys = [], [] + for p in patches: + cb = p.core_bounds_um + n = 5 if p.patch_id == sparse_id else 500 + xs.append(rng.uniform(cb.x_min + 0.1, cb.x_max - 0.1, n)) + ys.append(rng.uniform(cb.y_min + 0.1, cb.y_max - 0.1, n)) + + x = np.concatenate(xs) + y = np.concatenate(ys) + + merged, merge_count = merge_sparse_tiles( + patches=patches, + x_coords_um=x, + y_coords_um=y, + overlap_px=overlap_px, + pixel_size_um=PIXEL_SIZE, + image_width_px=image_px, + image_height_px=image_px, + min_transcripts=100, + ) + + assert merge_count == 1 + assert len(merged) == 3 + + # The sparse tile should no longer exist as a patch + merged_ids = {p.patch_id for p in merged} + assert sparse_id not in merged_ids + + # The absorbing neighbor's bounds should cover the sparse tile's area + sparse_global = patch_map[sparse_id].global_bounds_um + absorber = [p for p in merged if p.patch_id != sparse_id] + # At least one neighbor should now have bounds covering the sparse tile's origin + covers_sparse = any( + p.global_bounds_um.x_min <= sparse_global.x_min + 0.01 + and p.global_bounds_um.y_min <= sparse_global.y_min + 0.01 + for p in absorber + ) + assert covers_sparse, "No merged tile covers the sparse tile's region" + + def test_merge_preserves_all_transcripts(self, tmp_path: Path): + """After merging, divide_transcripts with merged grid loses no transcripts.""" + image_px = 1000 + image_um = image_px * PIXEL_SIZE + + # Create transcripts: sparse in one corner, dense elsewhere + rng = np.random.default_rng(33) + n_sparse = 10 + n_dense = 990 + + x_sparse = rng.uniform(0, image_um * 0.1, n_sparse).astype(np.float32) + y_sparse = rng.uniform(0, image_um * 0.1, n_sparse).astype(np.float32) + x_dense = rng.uniform(image_um * 0.3, image_um, n_dense).astype(np.float32) + y_dense = rng.uniform(image_um * 0.3, image_um, n_dense).astype(np.float32) + + n = n_sparse + n_dense + table = pa.table( + { + "transcript_id": pa.array( + [f"tx_{i}" for i in range(n)], type=pa.string() + ), + "cell_id": pa.array(["UNASSIGNED"] * n, type=pa.string()), + "overlaps_nucleus": pa.array([0] * n, type=pa.int32()), + "feature_name": pa.array( + [f"gene_{i % 50}" for i in range(n)], type=pa.string() + ), + "x_location": pa.array( + np.concatenate([x_sparse, x_dense]), type=pa.float32() + ), + "y_location": pa.array( + np.concatenate([y_sparse, y_dense]), type=pa.float32() + ), + "z_location": pa.array( + rng.uniform(0, 10, n).astype(np.float32), type=pa.float32() + ), + "qv": pa.array( + rng.uniform(20, 40, n).astype(np.float32), type=pa.float32() + ), + } + ) + parquet_path = tmp_path / "transcripts.parquet" + pq.write_table(table, str(parquet_path)) + + original_ids = set(table.column("transcript_id").to_pylist()) + output_dir = tmp_path / "output" + + divide_transcripts( + transcripts_path=parquet_path, + output_dir=output_dir, + image_width_px=image_px, + image_height_px=image_px, + tile_width_um=100.0, + overlap_um=10.0, + balanced=False, + pixel_size_um=PIXEL_SIZE, + max_workers=1, + min_transcripts=50, + ) + + with open(output_dir / "patch_grid.json") as f: + metadata = json.load(f) + + found_ids: set[str] = set() + for patch_meta in metadata["patches"]: + patch_parquet = output_dir / patch_meta["patch_id"] / "transcripts.parquet" + if patch_parquet.exists(): + tbl = pq.read_table(str(patch_parquet)) + found_ids.update(tbl.column("transcript_id").to_pylist()) + + missing = original_ids - found_ids + assert len(missing) == 0, ( + f"{len(missing)} transcripts lost after merge + divide" + ) + + def test_merge_disabled_with_zero_threshold(self): + """min_transcripts=0 disables merging regardless of transcript counts.""" + patches, image_px, overlap_px = _make_2x2_grid() + + # Put only 1 transcript per tile -- still no merge with threshold=0 + rng = np.random.default_rng(99) + xs, ys = [], [] + for p in patches: + cb = p.core_bounds_um + xs.append(rng.uniform(cb.x_min + 0.1, cb.x_max - 0.1, 1)) + ys.append(rng.uniform(cb.y_min + 0.1, cb.y_max - 0.1, 1)) + + x = np.concatenate(xs) + y = np.concatenate(ys) + + merged, merge_count = merge_sparse_tiles( + patches=patches, + x_coords_um=x, + y_coords_um=y, + overlap_px=overlap_px, + pixel_size_um=PIXEL_SIZE, + image_width_px=image_px, + image_height_px=image_px, + min_transcripts=0, + ) + + assert merge_count == 0 + assert len(merged) == len(patches) + + def test_count_transcripts_per_tile(self): + """Unit test for _count_transcripts_per_tile with known placement.""" + patches, image_px, _ = _make_2x2_grid() + + # Place 10 transcripts in each patch's core + rng = np.random.default_rng(11) + xs, ys = [], [] + expected_per_patch: dict[str, int] = {} + counts_list = [10, 20, 30, 40] + for p, n in zip(patches, counts_list): + cb = p.core_bounds_um + xs.append(rng.uniform(cb.x_min + 0.1, cb.x_max - 0.1, n)) + ys.append(rng.uniform(cb.y_min + 0.1, cb.y_max - 0.1, n)) + expected_per_patch[p.patch_id] = n + + x = np.concatenate(xs) + y = np.concatenate(ys) + + counts = _count_transcripts_per_tile(patches, x, y) + + for pid, expected in expected_per_patch.items(): + assert counts[pid] == expected, ( + f"Patch {pid}: expected {expected}, got {counts[pid]}" + ) + + def test_find_adjacent_patches(self): + """Each tile in a 2x2 grid has exactly 2 neighbors.""" + patches, _, _ = _make_2x2_grid() + adjacency = _find_adjacent_patches(patches) + + # 2x2 grid: each corner tile touches 2 others (horizontal + vertical) + for p in patches: + neighbors = adjacency[p.patch_id] + assert len(neighbors) == 2, ( + f"Patch {p.patch_id} has {len(neighbors)} neighbors, expected 2: {neighbors}" + ) diff --git a/tests/test_xenium_patch/test_stitch_transcripts.py b/tests/test_xenium_patch/test_stitch_transcripts.py new file mode 100644 index 00000000..5419c897 --- /dev/null +++ b/tests/test_xenium_patch/test_stitch_transcripts.py @@ -0,0 +1,865 @@ +"""Tests for stitch_transcripts.py — sopa-based stitching.""" + +import importlib.util +import json +import sys +from pathlib import Path + +import pyarrow as pa +import pyarrow.csv as pa_csv +import pytest +from shapely.geometry import Polygon, mapping + +# --------------------------------------------------------------------------- +# Import the standalone script from module resources +# --------------------------------------------------------------------------- + +_SCRIPT = ( + Path(__file__).resolve().parents[2] + / "modules/local/xenium_patch/stitch/resources/usr/bin/stitch_transcripts.py" +) +_spec = importlib.util.spec_from_file_location("stitch_transcripts", _SCRIPT) +_mod = importlib.util.module_from_spec(_spec) +sys.modules["stitch_transcripts"] = _mod +_spec.loader.exec_module(_mod) + +from stitch_transcripts import ( # noqa: E402 + Bounds, + PatchGridMetadata, + PatchInfo, + _normalize_geometry_collection, + read_geojson, + stitch_transcript_assignments, + transform_polygons, +) + +PIXEL_SIZE = 0.2125 + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_patch_info( + patch_id: str, + row: int, + col: int, + global_x: tuple[float, float], + global_y: tuple[float, float], + core_x: tuple[float, float], + core_y: tuple[float, float], +) -> PatchInfo: + """Create a PatchInfo with bounds in both pixel and micron space.""" + return PatchInfo( + patch_id=patch_id, + row=row, + col=col, + global_bounds_px=Bounds( + global_x[0] / PIXEL_SIZE, + global_x[1] / PIXEL_SIZE, + global_y[0] / PIXEL_SIZE, + global_y[1] / PIXEL_SIZE, + ), + global_bounds_um=Bounds(global_x[0], global_x[1], global_y[0], global_y[1]), + core_bounds_px=Bounds( + core_x[0] / PIXEL_SIZE, + core_x[1] / PIXEL_SIZE, + core_y[0] / PIXEL_SIZE, + core_y[1] / PIXEL_SIZE, + ), + core_bounds_um=Bounds(core_x[0], core_x[1], core_y[0], core_y[1]), + ) + + +def _make_metadata(patches: list[PatchInfo]) -> PatchGridMetadata: + """Create minimal PatchGridMetadata.""" + return PatchGridMetadata( + version="1.0", + bundle_path="", + image_height_px=10000, + image_width_px=10000, + pixel_size_um=PIXEL_SIZE, + transcript_extent_um=Bounds(0.0, 2125.0, 0.0, 2125.0), + grid_rows=1, + grid_cols=2, + overlap_um=50.0, + overlap_px=236, + patches=patches, + ) + + +def _write_grid_json(metadata: PatchGridMetadata, output_path: Path) -> None: + """Serialize PatchGridMetadata to JSON (matching the format load_grid_metadata expects).""" + + def bounds_dict(b: Bounds) -> dict: + return {"x_min": b.x_min, "x_max": b.x_max, "y_min": b.y_min, "y_max": b.y_max} + + data = { + "version": metadata.version, + "bundle_path": metadata.bundle_path, + "image_height_px": metadata.image_height_px, + "image_width_px": metadata.image_width_px, + "pixel_size_um": metadata.pixel_size_um, + "transcript_extent_um": bounds_dict(metadata.transcript_extent_um), + "grid_rows": metadata.grid_rows, + "grid_cols": metadata.grid_cols, + "overlap_um": metadata.overlap_um, + "overlap_px": metadata.overlap_px, + "grid_type": metadata.grid_type, + "patches": [ + { + "patch_id": p.patch_id, + "row": p.row, + "col": p.col, + "global_bounds_px": bounds_dict(p.global_bounds_px), + "global_bounds_um": bounds_dict(p.global_bounds_um), + "core_bounds_px": bounds_dict(p.core_bounds_px), + "core_bounds_um": bounds_dict(p.core_bounds_um), + } + for p in metadata.patches + ], + } + output_path.parent.mkdir(parents=True, exist_ok=True) + with open(output_path, "w") as f: + json.dump(data, f, indent=2) + + +def _write_patch_csv( + patch_dir: Path, rows: list[dict], filename: str = "segmentation.csv" +) -> None: + """Write a Baysor-style CSV.""" + if not rows: + return + cols = list(rows[0].keys()) + arrays = { + col: pa.array([str(r[col]) for r in rows], type=pa.string()) for col in cols + } + table = pa.table(arrays) + patch_dir.mkdir(parents=True, exist_ok=True) + pa_csv.write_csv(table, patch_dir / filename) + + +def _write_patch_geojson( + patch_dir: Path, + cell_polygons: dict[str, Polygon], + filename: str = "segmentation_polygons.json", +) -> None: + """Write a GeoJSON FeatureCollection with cell polygons in local coordinates.""" + features = [] + for cell_id, poly in cell_polygons.items(): + features.append( + { + "type": "Feature", + "id": cell_id, + "geometry": mapping(poly), + "properties": {"cell_id": cell_id}, + } + ) + geojson = {"type": "FeatureCollection", "features": features} + patch_dir.mkdir(parents=True, exist_ok=True) + with open(patch_dir / filename, "w") as f: + json.dump(geojson, f) + + +# --------------------------------------------------------------------------- +# Stitch tests +# --------------------------------------------------------------------------- + + +class TestStitchBasic: + def test_stitch_basic(self, tmp_path: Path): + """Create 2 patches with non-overlapping cells, verify merged output.""" + # Patch 0: core [0,500) x [0,1000), global [0,525) x [0,1000) + # Patch 1: core [500,1000) x [0,1000), global [475,1000) x [0,1000) + p0 = _make_patch_info( + "patch_0", + 0, + 0, + global_x=(0.0, 525.0), + global_y=(0.0, 1000.0), + core_x=(0.0, 500.0), + core_y=(0.0, 1000.0), + ) + p1 = _make_patch_info( + "patch_1", + 0, + 1, + global_x=(475.0, 1000.0), + global_y=(0.0, 1000.0), + core_x=(500.0, 1000.0), + core_y=(0.0, 1000.0), + ) + metadata = _make_metadata([p0, p1]) + + patches_dir = tmp_path / "patches" + _write_grid_json(metadata, patches_dir / "patch_grid.json") + + # Patch 0: cell at (100, 100) local -> (100, 100) global + _write_patch_csv( + patches_dir / "patch_0", + [ + { + "transcript_id": "tx_1", + "x": "100.0", + "y": "100.0", + "gene": "A", + "cell": "cell_1", + "is_noise": "0", + }, + { + "transcript_id": "tx_2", + "x": "200.0", + "y": "200.0", + "gene": "B", + "cell": "cell_1", + "is_noise": "0", + }, + ], + ) + _write_patch_geojson( + patches_dir / "patch_0", + {"cell_1": Polygon([(50, 50), (250, 50), (250, 250), (50, 250)])}, + ) + + # Patch 1: cell at (100, 100) local -> (575, 100) global + _write_patch_csv( + patches_dir / "patch_1", + [ + { + "transcript_id": "tx_3", + "x": "100.0", + "y": "100.0", + "gene": "C", + "cell": "cell_1", + "is_noise": "0", + }, + ], + ) + _write_patch_geojson( + patches_dir / "patch_1", + {"cell_1": Polygon([(50, 50), (200, 50), (200, 200), (50, 200)])}, + ) + + output_dir = tmp_path / "output" + stitch_transcript_assignments( + patches_dir=patches_dir, + output_dir=output_dir, + max_workers=1, + ) + + csv_out = output_dir / "xr-transcript-metadata.csv" + assert csv_out.exists() + + geo_out = output_dir / "xr-cell-polygons.geojson" + assert geo_out.exists() + + # Read CSV and verify transcripts present + merged = pa_csv.read_csv(csv_out) + assert merged.num_rows >= 3 + + def test_stitch_cell_id_sequential(self, tmp_path: Path): + """Verify global IDs are cell-1, cell-2, ...""" + p0 = _make_patch_info( + "patch_0", + 0, + 0, + global_x=(0.0, 525.0), + global_y=(0.0, 1000.0), + core_x=(0.0, 500.0), + core_y=(0.0, 1000.0), + ) + p1 = _make_patch_info( + "patch_1", + 0, + 1, + global_x=(475.0, 1000.0), + global_y=(0.0, 1000.0), + core_x=(500.0, 1000.0), + core_y=(0.0, 1000.0), + ) + metadata = _make_metadata([p0, p1]) + + patches_dir = tmp_path / "patches" + _write_grid_json(metadata, patches_dir / "patch_grid.json") + + _write_patch_csv( + patches_dir / "patch_0", + [ + { + "transcript_id": "tx_1", + "x": "100.0", + "y": "100.0", + "gene": "A", + "cell": "cell_1", + "is_noise": "0", + }, + ], + ) + _write_patch_geojson( + patches_dir / "patch_0", + {"cell_1": Polygon([(50, 50), (250, 50), (250, 250), (50, 250)])}, + ) + + _write_patch_csv( + patches_dir / "patch_1", + [ + { + "transcript_id": "tx_2", + "x": "100.0", + "y": "100.0", + "gene": "B", + "cell": "cell_1", + "is_noise": "0", + }, + ], + ) + _write_patch_geojson( + patches_dir / "patch_1", + {"cell_1": Polygon([(50, 50), (200, 50), (200, 200), (50, 200)])}, + ) + + output_dir = tmp_path / "output" + stitch_transcript_assignments( + patches_dir=patches_dir, + output_dir=output_dir, + max_workers=1, + ) + + geo_out = output_dir / "xr-cell-polygons.geojson" + with open(geo_out) as f: + geo = json.load(f) + + cell_ids = [feat["id"] for feat in geo["features"]] + for cid in cell_ids: + assert cid.startswith("cell-"), f"Cell ID {cid} not in cell-N format" + + # IDs should be sequential starting at 1 + numbers = sorted(int(cid.split("-")[1]) for cid in cell_ids) + assert numbers == list(range(1, len(cell_ids) + 1)) + + def test_stitch_transcript_dedup(self, tmp_path: Path): + """Same transcript in 2 patches: assigned wins over noise.""" + p0 = _make_patch_info( + "patch_0", + 0, + 0, + global_x=(0.0, 600.0), + global_y=(0.0, 1000.0), + core_x=(0.0, 500.0), + core_y=(0.0, 1000.0), + ) + p1 = _make_patch_info( + "patch_1", + 0, + 1, + global_x=(400.0, 1000.0), + global_y=(0.0, 1000.0), + core_x=(500.0, 1000.0), + core_y=(0.0, 1000.0), + ) + metadata = _make_metadata([p0, p1]) + + patches_dir = tmp_path / "patches" + _write_grid_json(metadata, patches_dir / "patch_grid.json") + + # tx_dup appears in both patches. In patch_0 it's assigned, in patch_1 it's noise. + _write_patch_csv( + patches_dir / "patch_0", + [ + { + "transcript_id": "tx_dup", + "x": "450.0", + "y": "100.0", + "gene": "A", + "cell": "cell_1", + "is_noise": "0", + }, + { + "transcript_id": "tx_only0", + "x": "100.0", + "y": "100.0", + "gene": "B", + "cell": "cell_1", + "is_noise": "0", + }, + ], + ) + _write_patch_geojson( + patches_dir / "patch_0", + {"cell_1": Polygon([(50, 50), (500, 50), (500, 250), (50, 250)])}, + ) + + _write_patch_csv( + patches_dir / "patch_1", + [ + { + "transcript_id": "tx_dup", + "x": "50.0", + "y": "100.0", + "gene": "A", + "cell": "", + "is_noise": "1", + }, + { + "transcript_id": "tx_only1", + "x": "200.0", + "y": "200.0", + "gene": "C", + "cell": "cell_1", + "is_noise": "0", + }, + ], + ) + _write_patch_geojson( + patches_dir / "patch_1", + {"cell_1": Polygon([(150, 50), (350, 50), (350, 350), (150, 350)])}, + ) + + output_dir = tmp_path / "output" + stitch_transcript_assignments( + patches_dir=patches_dir, + output_dir=output_dir, + max_workers=1, + ) + + csv_out = output_dir / "xr-transcript-metadata.csv" + merged = pa_csv.read_csv(csv_out) + tid_col = merged.column("transcript_id").to_pylist() + cell_col = merged.column("cell").to_pylist() + + # tx_dup should appear exactly once + dup_count = tid_col.count("tx_dup") + assert dup_count == 1, f"tx_dup appears {dup_count} times, expected 1" + + # The kept version should be assigned (non-empty cell) + dup_idx = tid_col.index("tx_dup") + assert cell_col[dup_idx] != "", "tx_dup should be assigned, not noise" + + def test_stitch_noise_spatial_reassignment(self, tmp_path: Path): + """Noise transcript inside a resolved cell polygon gets assigned.""" + p0 = _make_patch_info( + "patch_0", + 0, + 0, + global_x=(0.0, 600.0), + global_y=(0.0, 1000.0), + core_x=(0.0, 600.0), + core_y=(0.0, 1000.0), + ) + metadata = _make_metadata([p0]) + + patches_dir = tmp_path / "patches" + _write_grid_json(metadata, patches_dir / "patch_grid.json") + + # tx_noise is at (150, 150) local -> (150, 150) global, inside the cell polygon + _write_patch_csv( + patches_dir / "patch_0", + [ + { + "transcript_id": "tx_assigned", + "x": "100.0", + "y": "100.0", + "gene": "A", + "cell": "cell_1", + "is_noise": "0", + }, + { + "transcript_id": "tx_noise", + "x": "150.0", + "y": "150.0", + "gene": "B", + "cell": "", + "is_noise": "1", + }, + ], + ) + # Cell polygon covers (50,50) to (250,250) in local coords -> global same since origin is 0 + _write_patch_geojson( + patches_dir / "patch_0", + {"cell_1": Polygon([(50, 50), (250, 50), (250, 250), (50, 250)])}, + ) + + output_dir = tmp_path / "output" + stitch_transcript_assignments( + patches_dir=patches_dir, + output_dir=output_dir, + max_workers=1, + ) + + csv_out = output_dir / "xr-transcript-metadata.csv" + merged = pa_csv.read_csv(csv_out) + tid_col = merged.column("transcript_id").to_pylist() + cell_col = merged.column("cell").to_pylist() + + noise_idx = tid_col.index("tx_noise") + assert cell_col[noise_idx] != "", ( + "tx_noise should be spatially reassigned to a cell" + ) + + def test_stitch_geojson_not_found(self, tmp_path: Path): + """When GeoJSON doesn't exist, stitch should still work (transcript-only).""" + p0 = _make_patch_info( + "patch_0", + 0, + 0, + global_x=(0.0, 1000.0), + global_y=(0.0, 1000.0), + core_x=(0.0, 1000.0), + core_y=(0.0, 1000.0), + ) + metadata = _make_metadata([p0]) + + patches_dir = tmp_path / "patches" + _write_grid_json(metadata, patches_dir / "patch_grid.json") + + # Write CSV but no GeoJSON + _write_patch_csv( + patches_dir / "patch_0", + [ + { + "transcript_id": "tx_1", + "x": "100.0", + "y": "100.0", + "gene": "A", + "cell": "cell_1", + "is_noise": "0", + }, + ], + ) + + output_dir = tmp_path / "output" + # Should not raise + stitch_transcript_assignments( + patches_dir=patches_dir, + output_dir=output_dir, + max_workers=1, + ) + + # No geojson output (no polygons to write) + geo_out = output_dir / "xr-cell-polygons.geojson" + assert not geo_out.exists() + + +# --------------------------------------------------------------------------- +# Helper function tests +# --------------------------------------------------------------------------- + + +class TestReadGeoJSON: + def test_read_geojson_feature_collection(self, tmp_path: Path): + """Standard FeatureCollection is returned as-is.""" + geojson = { + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "id": "cell_1", + "geometry": mapping(Polygon([(0, 0), (10, 0), (10, 10), (0, 10)])), + "properties": {"cell_id": "cell_1"}, + } + ], + } + path = tmp_path / "test.geojson" + with open(path, "w") as f: + json.dump(geojson, f) + + result = read_geojson(path) + assert result["type"] == "FeatureCollection" + assert len(result["features"]) == 1 + assert result["features"][0]["id"] == "cell_1" + + def test_read_geojson_geometry_collection(self, tmp_path: Path): + """proseg's GeometryCollection format is normalized to FeatureCollection.""" + geojson = { + "type": "GeometryCollection", + "geometries": [ + { + "type": "Polygon", + "coordinates": [[[0, 0], [10, 0], [10, 10], [0, 10], [0, 0]]], + "cell": 1, + }, + { + "type": "Polygon", + "coordinates": [[[20, 20], [30, 20], [30, 30], [20, 30], [20, 20]]], + "cell": 2, + }, + ], + } + path = tmp_path / "proseg.geojson" + with open(path, "w") as f: + json.dump(geojson, f) + + result = read_geojson(path) + assert result["type"] == "FeatureCollection" + assert len(result["features"]) == 2 + assert result["features"][0]["id"] == "1" + assert result["features"][1]["id"] == "2" + # geometry should not contain the 'cell' key + assert "cell" not in result["features"][0]["geometry"] + + +class TestTransformPolygons: + def test_transform_polygons_offset(self): + """Verify coordinate shift by (offset_x, offset_y).""" + geojson = { + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "id": "cell_1", + "geometry": mapping(Polygon([(0, 0), (10, 0), (10, 10), (0, 10)])), + "properties": {"cell_id": "cell_1"}, + } + ], + } + + shifted = transform_polygons(geojson, offset_x=100.0, offset_y=200.0) + + assert shifted["type"] == "FeatureCollection" + assert len(shifted["features"]) == 1 + + coords = shifted["features"][0]["geometry"]["coordinates"][0] + xs = [c[0] for c in coords] + ys = [c[1] for c in coords] + + assert min(xs) == pytest.approx(100.0) + assert max(xs) == pytest.approx(110.0) + assert min(ys) == pytest.approx(200.0) + assert max(ys) == pytest.approx(210.0) + + +class TestNormalizeGeometryCollection: + def test_empty_geometry_collection(self): + """Empty GeometryCollection returns empty FeatureCollection.""" + result = _normalize_geometry_collection( + {"type": "GeometryCollection", "geometries": []} + ) + assert result["type"] == "FeatureCollection" + assert result["features"] == [] + + def test_string_cell_id_passthrough(self): + """Non-integer cell key is passed through as string.""" + geojson = { + "type": "GeometryCollection", + "geometries": [ + { + "type": "Polygon", + "coordinates": [[[0, 0], [1, 0], [1, 1], [0, 1], [0, 0]]], + "cell": "custom-id", + } + ], + } + result = _normalize_geometry_collection(geojson) + assert result["features"][0]["id"] == "custom-id" + + +# --------------------------------------------------------------------------- +# Baysor native format tests (empty cell column, mismatched IDs) +# --------------------------------------------------------------------------- + + +class TestBaysorNativeFormat: + def test_baysor_empty_cell_column(self, tmp_path: Path): + """Baysor native output: cell column is empty, GeoJSON has integer IDs. + + This is the core bug that spatial containment fixes. Previously, the + ID-matching approach would skip all polygons because no CSV cell values + matched the GeoJSON cell IDs. + """ + p0 = _make_patch_info( + "patch_0", + 0, + 0, + global_x=(0.0, 1000.0), + global_y=(0.0, 1000.0), + core_x=(0.0, 1000.0), + core_y=(0.0, 1000.0), + ) + metadata = _make_metadata([p0]) + + patches_dir = tmp_path / "patches" + _write_grid_json(metadata, patches_dir / "patch_grid.json") + + # Baysor CSV: cell column is EMPTY, cell_id has string labels + _write_patch_csv( + patches_dir / "patch_0", + [ + { + "transcript_id": "tx_1", + "cell_id": "higeahke-1", + "x": "100.0", + "y": "100.0", + "gene": "GeneA", + "cell": "", + "is_noise": "0", + }, + { + "transcript_id": "tx_2", + "cell_id": "higeahke-1", + "x": "150.0", + "y": "150.0", + "gene": "GeneB", + "cell": "", + "is_noise": "0", + }, + { + "transcript_id": "tx_3", + "cell_id": "", + "x": "800.0", + "y": "800.0", + "gene": "GeneC", + "cell": "", + "is_noise": "1", + }, + ], + ) + + # GeoJSON: GeometryCollection with integer cell keys (proseg format) + # Polygon covers (50,50)-(250,250), so tx_1 and tx_2 are inside, tx_3 is outside + geojson = { + "type": "GeometryCollection", + "geometries": [ + { + "type": "Polygon", + "coordinates": [ + [[50, 50], [250, 50], [250, 250], [50, 250], [50, 50]] + ], + "cell": 4986, + } + ], + } + patch_dir = patches_dir / "patch_0" + patch_dir.mkdir(parents=True, exist_ok=True) + with open(patch_dir / "segmentation_polygons.json", "w") as f: + json.dump(geojson, f) + + output_dir = tmp_path / "output" + stitch_transcript_assignments( + patches_dir=patches_dir, + output_dir=output_dir, + max_workers=1, + ) + + csv_out = output_dir / "xr-transcript-metadata.csv" + assert csv_out.exists(), "CSV output should be written" + + geo_out = output_dir / "xr-cell-polygons.geojson" + assert geo_out.exists(), "GeoJSON output should be written" + + merged = pa_csv.read_csv(csv_out) + tid_col = merged.column("transcript_id").to_pylist() + cell_col = merged.column("cell").to_pylist() + + # tx_1 and tx_2 should be assigned to a cell (spatially inside polygon) + for tx_id in ["tx_1", "tx_2"]: + idx = tid_col.index(tx_id) + assert cell_col[idx] != "", ( + f"{tx_id} should be assigned via spatial containment" + ) + assert cell_col[idx].startswith("cell-"), ( + f"{tx_id} should have global ID format" + ) + + # tx_3 should be noise (outside polygon) + tx3_idx = tid_col.index("tx_3") + assert cell_col[tx3_idx] == "", "tx_3 should remain noise (outside polygon)" + + def test_baysor_two_patches_empty_cell(self, tmp_path: Path): + """Two patches with Baysor native format: spatial assignment across patches.""" + p0 = _make_patch_info( + "patch_0", + 0, + 0, + global_x=(0.0, 525.0), + global_y=(0.0, 1000.0), + core_x=(0.0, 500.0), + core_y=(0.0, 1000.0), + ) + p1 = _make_patch_info( + "patch_1", + 0, + 1, + global_x=(475.0, 1000.0), + global_y=(0.0, 1000.0), + core_x=(500.0, 1000.0), + core_y=(0.0, 1000.0), + ) + metadata = _make_metadata([p0, p1]) + + patches_dir = tmp_path / "patches" + _write_grid_json(metadata, patches_dir / "patch_grid.json") + + # Patch 0: cell column empty, transcript at (100,100) + _write_patch_csv( + patches_dir / "patch_0", + [ + { + "transcript_id": "tx_1", + "cell_id": "abc-1", + "x": "100.0", + "y": "100.0", + "gene": "A", + "cell": "", + "is_noise": "0", + }, + ], + ) + # Polygon at (50,50)-(250,250) in local coords + _write_patch_geojson( + patches_dir / "patch_0", + {"anything": Polygon([(50, 50), (250, 50), (250, 250), (50, 250)])}, + ) + + # Patch 1: cell column empty, transcript at (100,100) local -> (575,100) global + _write_patch_csv( + patches_dir / "patch_1", + [ + { + "transcript_id": "tx_2", + "cell_id": "xyz-1", + "x": "100.0", + "y": "100.0", + "gene": "B", + "cell": "", + "is_noise": "0", + }, + ], + ) + _write_patch_geojson( + patches_dir / "patch_1", + {"whatever": Polygon([(50, 50), (200, 50), (200, 200), (50, 200)])}, + ) + + output_dir = tmp_path / "output" + stitch_transcript_assignments( + patches_dir=patches_dir, + output_dir=output_dir, + max_workers=1, + ) + + csv_out = output_dir / "xr-transcript-metadata.csv" + assert csv_out.exists() + + merged = pa_csv.read_csv(csv_out) + tid_col = merged.column("transcript_id").to_pylist() + cell_col = merged.column("cell").to_pylist() + + # Both transcripts should be assigned + for tx_id in ["tx_1", "tx_2"]: + idx = tid_col.index(tx_id) + assert cell_col[idx] != "", f"{tx_id} should be assigned" + assert cell_col[idx].startswith("cell-") + + # They should be in different cells + tx1_cell = cell_col[tid_col.index("tx_1")] + tx2_cell = cell_col[tid_col.index("tx_2")] + assert tx1_cell != tx2_cell, ( + "Transcripts in different patches should have different cells" + ) + + geo_out = output_dir / "xr-cell-polygons.geojson" + assert geo_out.exists() + with open(geo_out) as f: + geo = json.load(f) + assert len(geo["features"]) == 2 diff --git a/workflows/spatialaxe.nf b/workflows/spatialaxe.nf new file mode 100644 index 00000000..8e11f058 --- /dev/null +++ b/workflows/spatialaxe.nf @@ -0,0 +1,794 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// multiqc +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { MULTIQC as MULTIQC_PRE_XR_RUN } from '../modules/nf-core/multiqc/main' +include { MULTIQC as MULTIQC_POST_XR_RUN } from '../modules/nf-core/multiqc/main' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' + +// nf-core functionality +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_spatialaxe_pipeline' +include { paramsSummaryMap } from 'plugin/nf-schema' + +// nf-core modules +include { UNTAR } from '../modules/nf-core/untar/main' + +// coordinate-based segmentation subworklfows +include { SEGGER_CREATE_TRAIN_PREDICT } from '../subworkflows/local/segger_create_train_predict/main' +include { PROSEG_PRESET_PROSEG2BAYSOR } from '../subworkflows/local/proseg_preset_proseg2baysor/main' +include { PROSEG_PRESET_PROSEG2BAYSOR_TILED } from '../subworkflows/local/proseg_preset_proseg2baysor_tiled/main' +include { BAYSOR_GENERATE_PREVIEW } from '../subworkflows/local/baysor_generate_preview/main' +include { BAYSOR_RUN_TRANSCRIPTS_PARQUET } from '../subworkflows/local/baysor_run_transcripts_parquet/main' + +// image-based segmentation subworklfows +include { BAYSOR_RUN_PRIOR_SEGMENTATION_MASK } from '../subworkflows/local/baysor_run_prior_segmentation_mask/main' +include { CELLPOSE_RESOLIFT_MORPHOLOGY_OME_TIF } from '../subworkflows/local/cellpose_resolift_morphology_ome_tif/main' +include { CELLPOSE_BAYSOR_IMPORT_SEGMENTATION } from '../subworkflows/local/cellpose_baysor_import_segmentation/main' +include { STARDIST_RESOLIFT_MORPHOLOGY_OME_TIF } from '../subworkflows/local/stardist_resolift_morphology_ome_tif/main' +include { XENIUMRANGER_RESEGMENT_MORPHOLOGY_OME_TIF } from '../subworkflows/local/xeniumranger_resegment_morphology_ome_tif/main' + +// segmentation-free subworkflows +include { BAYSOR_GENERATE_SEGFREE } from '../subworkflows/local/baysor_generate_segfree/main' +include { FICTURE_PREPROCESS_MODEL } from '../subworkflows/local/ficture_preprocess_model/main' + +// xeniumranger subworkflows +include { XENIUMRANGER_RELABEL_RESEGMENT } from '../subworkflows/local/xeniumranger_relabel_resegment/main' +include { XENIUMRANGER_IMPORT_SEGMENTATION_REDEFINE_BUNDLE } from '../subworkflows/local/xeniumranger_import_segmentation_redefine_bundle/main' + +// spatialdata subworkflows +include { SPATIALDATA_WRITE_META_MERGE } from '../subworkflows/local/spatialdata_write_meta_merge/main' + +// qc layer subworkflows +include { OPT_FLIP_TRACK_STAT } from '../subworkflows/local/opt_flip_track_stat/main' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow SPATIALAXE { + take: + ch_samplesheet // channel: samplesheet read in from --input + alignment_csv + baysor_config + baysor_prior + baysor_scale + baysor_tiling + baysor_tiling_scale + buffer_samples + buffer_size + cell_segmentation_only + cellpose_downscale + cellpose_model + expansion_distance + features + gene_panel + gene_synonyms + max_x + max_y + method + min_qv + min_x + min_y + mode + multiqc_config + multiqc_logo + multiqc_methods_description + nucleus_segmentation_only + offtarget_probe_tracking + outdir + probes_fasta + qupath_polygons + reference_annotations + relabel_genes + run_qc + segger_model + segmentation_mask + sharpen_tiff + stardist_nuclei_model + tiling + xeniumranger_only + + main: + + /* + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SPATIALAXE - GENERATE INPUT CHANNELS + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + + ch_versions = channel.empty() + + ch_input = channel.empty() + ch_config = channel.empty() + ch_features = channel.value([]) + ch_raw_bundle = channel.empty() + ch_gene_panel = channel.empty() + ch_qc_reports = channel.empty() + ch_bundle_path = channel.empty() + ch_preview_html = channel.empty() + ch_exp_metadata = channel.empty() + ch_gene_synonyms = channel.empty() + ch_multiqc_files = channel.empty() + ch_multiqc_report = channel.empty() + ch_qupath_polygons = channel.empty() + ch_morphology_image = channel.empty() + ch_redefined_bundle = channel.empty() + ch_coordinate_space = channel.empty() + ch_panel_probes_fasta = channel.empty() + ch_transcripts_file = channel.empty() + ch_reference_annotations = channel.empty() + ch_multiqc_pre_xr_report = channel.empty() + ch_multiqc_post_xr_report = channel.empty() + + + /* + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SPATIALAXE - DATA STAGING + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + + if (workflow.profile.contains('test')) { + + // get sample, xenium bundle and image path + ch_input_untar = ch_samplesheet.map { meta, bundle, _image -> + return [meta, bundle] + } + + // get testdata + UNTAR(ch_input_untar) + + ch_untar_outs = UNTAR.out.untar.map { meta, bundle -> + return [meta, bundle.toString()] + } + + ch_samplesheet + .combine(ch_untar_outs, by: 0) + .map { meta, _url, image, test_bundle -> + return [meta, test_bundle, image] + } + .set { ch_input } + } + else { + // for all other profile runs + + // check if samples are buffered + if (buffer_samples) { + ch_input = ch_samplesheet.buffer(size: buffer_size).map + { buffered_sample -> + def (meta, bundle, tif) = buffered_sample[0] + tuple(meta, bundle, tif) + } + } + else { + ch_input = ch_samplesheet + } + } + + // validate xenium bundle POST-staging — works uniformly for production + // directories and for tarball inputs that the UNTAR step has just extracted + def bundle_required_files = [ + "cell_boundaries.csv.gz", + "cell_boundaries.parquet", + "cell_feature_matrix.h5", + "cell_feature_matrix.zarr.zip", + "cells.csv.gz", + "cells.parquet", + "cells.zarr.zip", + "experiment.xenium", + "gene_panel.json", + "metrics_summary.csv", + "morphology.ome.tif", + "morphology_focus/", + "nucleus_boundaries.csv.gz", + "nucleus_boundaries.parquet", + "transcripts.parquet", + "transcripts.zarr.zip", + ] + def bundle_optional_files = [ + "analysis.tar.gz", + "analysis.zarr.zip", + "analysis_summary.html", + ] + + // path to bundle input + ch_bundle_path = ch_input.map { meta, bundle, _image -> + + def bundle_path = file(bundle) + if( !bundle_path.exists() ) { + error("❌ Xenium bundle does not exist: ${bundle}") + } + + def missing_required = bundle_required_files.findAll { check -> !file("${bundle_path}/${check}").exists() } + if (missing_required) { + error("❌ Missing required file(s) in xenium bundle '${bundle}': ${missing_required}") + } + + def missing_optional = bundle_optional_files.findAll { check -> !file("${bundle_path}/${check}").exists() } + if (missing_optional) { + log.warn("⚠️ Missing optional file(s) in xenium bundle '${bundle}': ${missing_optional}") + } + + log.info("✅ Xenium bundle validated: ${bundle}") + return [meta, bundle] + } + + // get transcript.parquet from the xenium bundle + ch_transcripts_file = ch_input.map { meta, bundle, _image -> + def transcripts_parquet = file( + bundle.toString().replaceFirst(/\/$/, '') + "/transcripts.parquet", + checkIfExists: true + ) + return [meta, transcripts_parquet] + } + + // get morphology focus image from the xenium bundle (single 2D plane) + // supports all Xenium versions: + // v2/v3: morphology_focus/morphology_focus_0000.ome.tif + // v4+: morphology_focus/ch0000_dapi.ome.tif + // v1.x: morphology_focus.ome.tif (single file at bundle root) + // fallback: morphology.ome.tif (multi-Z stack, not ideal for Cellpose) + ch_morphology_image = ch_input.map { meta, bundle, image -> + def morphology_img + if (image) { + morphology_img = file(image) + } else { + def bundle_path = bundle.toString().replaceFirst(/\/$/, '') + def focus_v3 = file("${bundle_path}/morphology_focus/morphology_focus_0000.ome.tif") + def focus_v4 = file("${bundle_path}/morphology_focus/ch0000_dapi.ome.tif") + def focus_v1 = file("${bundle_path}/morphology_focus.ome.tif") + if (focus_v3.exists()) { + morphology_img = focus_v3 + } else if (focus_v4.exists()) { + morphology_img = focus_v4 + } else if (focus_v1.exists()) { + morphology_img = focus_v1 + } else { + morphology_img = file("${bundle_path}/morphology.ome.tif", checkIfExists: true) + } + } + return [meta, morphology_img] + } + + // get experiment metdata - experiment.xenium + ch_exp_metadata = ch_input.map { meta, bundle, _image -> + def exp_metadata = file( + bundle.toString().replaceFirst(/\/$/, '') + "/experiment.xenium", + checkIfExists: true + ) + return [meta, exp_metadata] + } + + // get baysor xenium config + ch_config = channel.fromPath( + "${projectDir}/assets/config/xenium.toml", + checkIfExists: true + ) + .flatten() + + // get segmentation mask if provided with --segmentation_mask for the baysor method + if (segmentation_mask) { + ch_segmentation_mask = channel.fromPath( + segmentation_mask, + checkIfExists: true + ) + .flatten() + } + + // get a list of features if provided with the --features for the ficture method + ch_features = features + ? channel.fromPath(features, checkIfExists: true).flatten() + : channel.value([]) + + // get custom cellpose model if provided with the --cellpose_model for the cellpose method + if (cellpose_model) { + cellpose_model = channel.fromPath( + cellpose_model, + checkIfExists: true + ) + .flatten() + } + + // get panel probes fasta for off-target-probe tracking + if (probes_fasta) { + ch_panel_probes_fasta = channel.fromPath( + probes_fasta, + checkIfExists: true + ) + .flatten() + } + + // get reference annotation files (gff,fa) for off-target-probe tracking + if (reference_annotations) { + ch_reference_annotations = channel.fromPath( + "${reference_annotations}/*.{fa,gff}".toString(), + checkIfExists: true + ) + .flatten() + } + + // get gene synonyms for off-target-probe tracking + if (gene_synonyms) { + ch_gene_synonyms = channel.fromPath( + gene_synonyms, + checkIfExists: true + ) + .flatten() + } + + // get qupath ploygons + if (qupath_polygons) { + ch_qupath_polygons = channel.fromPath( + "${qupath_polygons}/*.geojson", + checkIfExists: true + ) + .flatten() + } + + // get gene_panel.json if provided with --gene_panel, sets relabel_genes to true + def do_relabel = gene_panel ? true : relabel_genes + if (gene_panel) { + + def gene_panel_file = file(gene_panel, checkIfExists: true) + ch_gene_panel = ch_input.map { meta, _bundle, _image -> + return [meta, gene_panel_file] + } + } + else { + + // gene panel to use if only --relabel_genes is provided + ch_gene_panel = ch_input.map { meta, bundle, _image -> + def gene_panel_file = file( + bundle.toString().replaceFirst(/\/$/, '') + "/gene_panel.json", + checkIfExists: true + ) + return [meta, gene_panel_file] + } + } + + /* + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SPATIALAXE - RELABEL GENES + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + + // run xr relabel if relabel_genes is true, check if gene_panel.json is provided + if (do_relabel) { + + XENIUMRANGER_RELABEL_RESEGMENT( + ch_bundle_path, + ch_gene_panel, + ) + ch_raw_bundle = XENIUMRANGER_RELABEL_RESEGMENT.out.redefined_bundle + } + else { + ch_raw_bundle = ch_bundle_path + } + + /* + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SPATIALAXE - DATA PREVIEW + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + // run baysor preview if `generate_preview ` is true + if (mode == 'preview') { + + BAYSOR_GENERATE_PREVIEW( + ch_transcripts_file, + ch_config, + ) + ch_preview_html = BAYSOR_GENERATE_PREVIEW.out.preview_html + } + + /* + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SPATIALAXE - XENIUMRANGER LAYER + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + // run only xeniumranger import segmentation with changes xr specific params + if (mode == 'image' && xeniumranger_only) { + + XENIUMRANGER_IMPORT_SEGMENTATION_REDEFINE_BUNDLE( + ch_bundle_path, + alignment_csv, + expansion_distance, + nucleus_segmentation_only, + qupath_polygons, + ) + ch_redefined_bundle = XENIUMRANGER_IMPORT_SEGMENTATION_REDEFINE_BUNDLE.out.redefined_bundle + ch_coordinate_space = XENIUMRANGER_IMPORT_SEGMENTATION_REDEFINE_BUNDLE.out.coordinate_space + } + + + /* + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SPATIALAXE - IMAGE-BASED SEGMENTATION LAYER + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + if (mode == 'image') { + + // trigger the default image-based workflow if no method is specified + if (!method) { + + CELLPOSE_BAYSOR_IMPORT_SEGMENTATION( + ch_morphology_image, + ch_bundle_path, + ch_transcripts_file, + ch_exp_metadata, + ch_config, + cell_segmentation_only, + cellpose_model, + max_x, + max_y, + min_qv, + min_x, + min_y, + nucleus_segmentation_only, + sharpen_tiff, + stardist_nuclei_model, + ) + ch_redefined_bundle = CELLPOSE_BAYSOR_IMPORT_SEGMENTATION.out.redefined_bundle + ch_coordinate_space = CELLPOSE_BAYSOR_IMPORT_SEGMENTATION.out.coordinate_space + } + + // run xeniumranger resegment with morphology_ome.tif + if (method == 'xeniumranger') { + + XENIUMRANGER_RESEGMENT_MORPHOLOGY_OME_TIF( + ch_bundle_path, + nucleus_segmentation_only, + ) + ch_redefined_bundle = XENIUMRANGER_RESEGMENT_MORPHOLOGY_OME_TIF.out.redefined_bundle + ch_coordinate_space = XENIUMRANGER_RESEGMENT_MORPHOLOGY_OME_TIF.out.coordinate_space + } + + // run baysor run with morphology_ome.tif + if (method == 'baysor') { + + if (segmentation_mask) { + BAYSOR_RUN_PRIOR_SEGMENTATION_MASK( + ch_bundle_path, + ch_transcripts_file, + ch_segmentation_mask, + ch_config, + max_x, + max_y, + min_qv, + min_x, + min_y, + ) + } + ch_redefined_bundle = BAYSOR_RUN_PRIOR_SEGMENTATION_MASK.out.redefined_bundle + ch_coordinate_space = BAYSOR_RUN_PRIOR_SEGMENTATION_MASK.out.coordinate_space + } + + // run cellpose on the morphology_ome.tif + if (method == 'cellpose') { + + CELLPOSE_RESOLIFT_MORPHOLOGY_OME_TIF( + ch_morphology_image, + ch_bundle_path, + cellpose_downscale, + cellpose_model, + nucleus_segmentation_only, + sharpen_tiff, + stardist_nuclei_model, + ) + ch_redefined_bundle = CELLPOSE_RESOLIFT_MORPHOLOGY_OME_TIF.out.redefined_bundle + ch_coordinate_space = CELLPOSE_RESOLIFT_MORPHOLOGY_OME_TIF.out.coordinate_space + } + + // run stardist on the morphology_ome.tif + if (method == 'stardist') { + + STARDIST_RESOLIFT_MORPHOLOGY_OME_TIF( + ch_morphology_image, + ch_bundle_path, + sharpen_tiff, + stardist_nuclei_model, + ) + ch_redefined_bundle = STARDIST_RESOLIFT_MORPHOLOGY_OME_TIF.out.redefined_bundle + ch_coordinate_space = STARDIST_RESOLIFT_MORPHOLOGY_OME_TIF.out.coordinate_space + } + } + + /* + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SPATIALAXE - TRANSCRIPT-BASED SEGMENTATION LAYER + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + if (mode == 'coordinate') { + + // run proseg with transcripts.parquet if method = proseg or is not provided (default workflow) + if (!method || method == 'proseg') { + + if (tiling) { + PROSEG_PRESET_PROSEG2BAYSOR_TILED( + ch_bundle_path, + ch_transcripts_file, + ) + ch_redefined_bundle = PROSEG_PRESET_PROSEG2BAYSOR_TILED.out.redefined_bundle + ch_coordinate_space = PROSEG_PRESET_PROSEG2BAYSOR_TILED.out.coordinate_space + } else { + PROSEG_PRESET_PROSEG2BAYSOR( + ch_bundle_path, + ch_transcripts_file, + ) + ch_redefined_bundle = PROSEG_PRESET_PROSEG2BAYSOR.out.redefined_bundle + ch_coordinate_space = PROSEG_PRESET_PROSEG2BAYSOR.out.coordinate_space + } + } + + // run segger with transcripts.parquet + if (method == 'segger') { + + SEGGER_CREATE_TRAIN_PREDICT( + ch_bundle_path, + ch_transcripts_file, + segger_model, + ) + ch_redefined_bundle = SEGGER_CREATE_TRAIN_PREDICT.out.redefined_bundle + ch_coordinate_space = SEGGER_CREATE_TRAIN_PREDICT.out.coordinate_space + } + + // run baysor with transcripts.parquet (unified tiled/non-tiled subworkflow) + if (method == 'baysor') { + + // Image-based prior (cellpose mask) requires non-tiled Baysor + if ( baysor_tiling && baysor_prior == 'cellpose' ) { + error "ERROR: baysor_prior='cellpose' (image-based) requires baysor_tiling=false. " + + "For tiled Baysor, use baysor_prior='cells' (column-based)." + } + + ch_prior_mask = channel.empty() + + BAYSOR_RUN_TRANSCRIPTS_PARQUET( + ch_bundle_path, + ch_transcripts_file, + ch_morphology_image, + ch_config, + ch_prior_mask, + baysor_config, + baysor_scale, + baysor_tiling, + baysor_tiling_scale, + max_x, + max_y, + min_qv, + min_x, + min_y, + ) + ch_redefined_bundle = BAYSOR_RUN_TRANSCRIPTS_PARQUET.out.redefined_bundle + ch_coordinate_space = BAYSOR_RUN_TRANSCRIPTS_PARQUET.out.coordinate_space + } + } + + + + /* + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SPATIALAXE - SPATIALDATA / METADATA LAYER + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + + // run spatialdata modules to generate sd objects in image or coordinate mode + if (mode == 'image' || mode == 'coordinate') { + + SPATIALDATA_WRITE_META_MERGE( + ch_bundle_path, + ch_redefined_bundle, + ch_coordinate_space, + cell_segmentation_only, + mode, + nucleus_segmentation_only, + ) + } + + /* + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SPATIALAXE - QC LAYER + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + + // check to run the qc layer + if (mode == 'qc' || run_qc) { + + if (offtarget_probe_tracking) { + + // run off-target probe tracking + OPT_FLIP_TRACK_STAT( + ch_panel_probes_fasta, + ch_reference_annotations, + ch_gene_synonyms, + ) + } + } + + + /* + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SPATIALAXE - SEGMENTATION-FREE LAYER + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + if (mode == 'segfree') { + + // trigger the default segfree workflow if no method or if the method is baysor + if (!method || method == 'baysor') { + + BAYSOR_GENERATE_SEGFREE( + ch_transcripts_file, + ch_config, + max_x, + max_y, + min_qv, + min_x, + min_y, + ) + } + + // run ficture with transcripts.parquet + if (method == 'ficture') { + + FICTURE_PREPROCESS_MODEL( + ch_transcripts_file, + ch_features, + features, + ) + } + } + + + + /* + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SPATIALAXE - COLLATE & SAVE SOFTWARE VERSIONS + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + // Collect versions published via topic channels (local modules) + ch_topic_versions = channel.topic('versions') + .map { process, tool, version -> + "\"${process}\":\n ${tool}: ${version}" + } + + softwareVersionsToYAML(ch_versions.mix(ch_topic_versions)) + .collectFile( + storeDir: "${outdir}/pipeline_info", + name: 'nf_core_' + 'spatialaxe_software_' + 'mqc_' + 'versions.yml', + sort: true, + newLine: true, + ) + .set { ch_collated_versions } + + /* + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SPATIALAXE - MultiQC + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + ch_multiqc_config = channel.fromPath( + "${projectDir}/assets/multiqc_config.yml", + checkIfExists: true + ) + + ch_multiqc_custom_config = multiqc_config + ? channel.fromPath(multiqc_config, checkIfExists: true) + : channel.empty() + + ch_multiqc_logo = multiqc_logo + ? channel.fromPath(multiqc_logo, checkIfExists: true) + : channel.empty() + + // Combine default and custom configs into a single list for the tuple-based MULTIQC input + ch_multiqc_configs = ch_multiqc_config.mix(ch_multiqc_custom_config).collect() + + summary_params = paramsSummaryMap( + workflow, + parameters_schema: "nextflow_schema.json" + ) + + ch_workflow_summary = channel.value(paramsSummaryMultiqc(summary_params)) + + ch_multiqc_files = ch_multiqc_files.mix( + ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml') + ) + + ch_multiqc_custom_methods_description = multiqc_methods_description + ? file(multiqc_methods_description, checkIfExists: true) + : file("${projectDir}/assets/methods_description_template.yml", checkIfExists: true) + + ch_methods_description = channel.value( + methodsDescriptionText(ch_multiqc_custom_methods_description) + ) + + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + + ch_multiqc_files = ch_multiqc_files.mix( + ch_methods_description.collectFile( + name: 'methods_description_mqc.yaml', + sort: true, + ) + ) + + if (mode == 'image' || mode == 'coordinate') { + + // get path to the raw bundle + ch_multiqc_files = ch_multiqc_files.mix( + ch_bundle_path.map { _meta, bundle -> file(bundle) }.collect().ifEmpty([]) + ) + + MULTIQC_PRE_XR_RUN ( + ch_multiqc_files.collect().map { mqc_files -> [mqc_files] } + .combine(ch_multiqc_configs.map { mqc_configs -> [mqc_configs] }) + .combine(ch_multiqc_logo.toList().map { mqc_logo -> [mqc_logo] }) + .map { files, configs, logo -> + [ [id: 'multiqc_pre_xr'], files, configs, logo ? logo[0] : [], [], [] ] + } + ) + ch_multiqc_pre_xr_report = MULTIQC_PRE_XR_RUN.out.report.map { _meta, report -> report }.toList() + + // get path to the redefined bundle + ch_multiqc_files = ch_multiqc_files.mix( + ch_redefined_bundle.map { _meta, bundle -> file(bundle) }.collect().ifEmpty([]) + ) + + MULTIQC_POST_XR_RUN ( + ch_multiqc_files.collect().map { mqc_files -> [mqc_files] } + .combine(ch_multiqc_configs.map { mqc_configs -> [mqc_configs] }) + .combine(ch_multiqc_logo.toList().map { mqc_logo -> [mqc_logo] }) + .map { files, configs, logo -> + [ [id: 'multiqc_post_xr'], files, configs, logo ? logo[0] : [], [], [] ] + } + ) + ch_multiqc_post_xr_report = MULTIQC_POST_XR_RUN.out.report.map { _meta, report -> report }.toList() + + } else { + + // get path to the raw bundle + ch_multiqc_files = ch_multiqc_files.mix( + ch_bundle_path.map { _meta, bundle -> file(bundle) }.collect().ifEmpty([]) + ) + + + // get the qc htmls if qc mode is run + if (mode == 'qc' || run_qc) { + + ch_multiqc_files = ch_multiqc_files.mix( + ch_qc_reports.map { _meta, qc_reports -> qc_reports }.collect().ifEmpty([]) + ) + + } + + + // get the preview html if preview mode is run + if (mode == 'preview') { + + ch_multiqc_files = ch_multiqc_files.mix( + ch_preview_html.map { _meta, preview_html -> preview_html }.collect().ifEmpty([]) + ) + + } + + + MULTIQC ( + ch_multiqc_files.collect().map { mqc_files -> [mqc_files] } + .combine(ch_multiqc_configs.map { mqc_configs -> [mqc_configs] }) + .combine(ch_multiqc_logo.toList().map { mqc_logo -> [mqc_logo] }) + .map { files, configs, logo -> + [ [id: 'multiqc'], files, configs, logo ? logo[0] : [], [], [] ] + } + ) + ch_multiqc_report = MULTIQC.out.report.map { _meta, report -> report }.toList() + + } + + emit: + multiqc_pre_xr_report = ch_multiqc_pre_xr_report // channel: /path/to/multiqc_report.html + multiqc_post_xr_report = ch_multiqc_post_xr_report // channel: /path/to/multiqc_report.html + multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/workflows/spatialxe.nf b/workflows/spatialxe.nf deleted file mode 100644 index d9a6a306..00000000 --- a/workflows/spatialxe.nf +++ /dev/null @@ -1,101 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ -include { FASTQC } from '../modules/nf-core/fastqc/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { paramsSummaryMap } from 'plugin/nf-schema' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_spatialxe_pipeline' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN MAIN WORKFLOW -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow SPATIALXE { - - take: - ch_samplesheet // channel: samplesheet read in from --input - multiqc_config - multiqc_logo - multiqc_methods_description - outdir - - main: - - def ch_versions = channel.empty() - def ch_multiqc_files = channel.empty() - // - // MODULE: Run FastQC - // - FASTQC(ch_samplesheet) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.map{ _meta, file -> file }) - - // - // Collate and save software versions - // - def topic_versions = channel.topic("versions") - .distinct() - .branch { entry -> - versions_file: entry instanceof Path - versions_tuple: true - } - - def topic_versions_string = topic_versions.versions_tuple - .map { process, tool, version -> - [ process[process.lastIndexOf(':')+1..-1], " ${tool}: ${version}" ] - } - .groupTuple(by:0) - .map { process, tool_versions -> - tool_versions.unique().sort() - "${process}:\n${tool_versions.join('\n')}" - } - - def ch_collated_versions = softwareVersionsToYAML(ch_versions.mix(topic_versions.versions_file)) - .mix(topic_versions_string) - .collectFile( - storeDir: "${outdir}/pipeline_info", - name: 'nf_core_' + 'spatialxe_software_' + 'mqc_' + 'versions.yml', - sort: true, - newLine: true - ) - - // - // MODULE: MultiQC - // - ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) - def ch_summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") - def ch_workflow_summary = channel.value(paramsSummaryMultiqc(ch_summary_params)) - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - def ch_multiqc_custom_methods_description = multiqc_methods_description - ? file(multiqc_methods_description, checkIfExists: true) - : file("${projectDir}/assets/methods_description_template.yml", checkIfExists: true) - def ch_methods_description = channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: true)) - MULTIQC( - ch_multiqc_files.flatten().collect().map { files -> - [ - [id: 'spatialxe'], - files, - multiqc_config - ? file(multiqc_config, checkIfExists: true) - : file("${projectDir}/assets/multiqc_config.yml", checkIfExists: true), - multiqc_logo ? file(multiqc_logo, checkIfExists: true) : [], - [], - [], - ] - } - ) - emit:multiqc_report = MULTIQC.out.report.map { _meta, report -> [report] }.toList() // channel: /path/to/multiqc_report.html - versions = ch_versions // channel: [ path(versions.yml) ] -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/