Skip to content

Commit 59fc0bb

Browse files
authored
Merge pull request #259 from EBI-Metagenomics/feature/coassembly-schema
Coassembly schema support
2 parents 48c6069 + 53640d5 commit 59fc0bb

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+852
-186
lines changed

.github/pull_request_template.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
This PR:
2+
*
3+
4+
5+
---
6+
7+
#### Checklist
8+
- The tests are passing on Github Actions (checked automatically)
9+
- The test coverage is at least as good as before (checked automatically)
10+
- Any model changes are reflected by migrations (checked automatically)
11+
- [ ] If `.talismanrc` was changed, it does not contain any duplicates (each file appears at most once)
12+
- [ ] The command `task make-dev-data` still works
13+
- [ ] The local docker-compose dev environment still works (`task run`)
14+
- [ ] Any new prefect flows activate Django before importing any models (`from activate_django_first import EMG_CONFIG`)
15+
- [ ] The code style guide in `README.md` has been followed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,5 +29,6 @@ slurm-dev-environment/fs/nfs/ftp/public/databases/metagenomics/mgnify_results/*
2929
genomes/temp/*
3030
slurm-dev-environment/fs/nfs/public/tests/assembly_v6_output/ERP106708/MGYS*
3131
slurm-dev-environment/fs/nfs/public/tests/amplicon_v6_output/dwca/*
32+
.pytest-cache/
3233
.claude/*
3334
CLAUDE.md

.pre-commit-config.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,7 @@ repos:
2424
hooks:
2525
- id: talisman-commit
2626
entry: cmd --githook pre-commit
27+
- repo: https://github.com/ebi-metagenomics/no_talisman_dupes
28+
rev: 'v0.1.0'
29+
hooks:
30+
- id: talisman-no-duplicate-filenames

.talismanrc

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
11
fileignoreconfig:
2-
- filename: slurm-dev-environment/configs/jwt_hs256.key
3-
checksum: cf9887754600f97f20b4afb223bcd666c3b0aa47dab9b067669c24997f398c26
4-
52
- filename: .github/workflows/release_k8s.yml
63
checksum: 0e93196d244417801fd40ad6bfa01f53a6bf04257371378a1dc3e14cbc7a04d8
74

@@ -54,7 +51,7 @@ fileignoreconfig:
5451
checksum: 3afe465b36e4e141124bd84c8459c5aa81332eedf49b691eff22c74df63a0f18
5552

5653
- filename: emgapiv2/settings.py
57-
checksum: ab0fa7cdb6cd720aedd0a4f6bb9e2940187f9de91a39967e93ed4b91ca5050c5
54+
checksum: 4113e9045da277c139530cbf71de334dc83b49a4f93a570b559e694bdd5cb0c8
5855

5956
- filename: emgapiv2/settings_test.py
6057
checksum: ded898400b4eda7e3cf75694af1b66235975a85eb5b8f49f3a2c708163a7aad4
@@ -65,6 +62,12 @@ fileignoreconfig:
6562
- filename: emgapiv2/widgets.py
6663
allowed_patterns: [key]
6764

65+
- filename: pyproject.toml
66+
allowed_patterns: [Config key]
67+
68+
- filename: slurm-dev-environment/configs/jwt_hs256.key
69+
checksum: cf9887754600f97f20b4afb223bcd666c3b0aa47dab9b067669c24997f398c26
70+
6871
- filename: slurm-dev-environment/configs/private-data-nginx.conf
6972
checksum: e3bb31e2a0f97c62ee684dd7124e5b72db364c52088ddd98331310909c6a0493
7073

@@ -155,9 +158,6 @@ fileignoreconfig:
155158
- filename: workflows/flows/update_ena_accession_from_json_flow.py
156159
allowed_patterns: [total_missing_key_or_parse_error]
157160

158-
- filename: workflows/flows/upload_assembly.py
159-
checksum: f82f92058dabd50b9b6caf557d4eadae227ec4f6a0233135653251c20cf3c64b
160-
161161
- filename: workflows/prefect_utils/env_context.py
162162
checksum: 8f5e11c7e0038bc4a648dc61ec657a679356857f77e557f744e2dd769ad88def
163163
ignore_detectors: [filename]
@@ -186,9 +186,6 @@ fileignoreconfig:
186186
- filename: workflows/data_io_utils/schemas/base.py
187187
checksum: 27b03a470da785c58f8294d2aa0aa3ef5f4e5ec5cffd97984ab446a5b8a9f337
188188

189-
- filename: Taskfile.yaml
190-
checksum: 592e19dbbafe04c136aa73a4921ac04b2d0473e654416b08d09e81d7544b8b09
191-
192189
- filename: workflows/data_io_utils/schemas/assembly.py
193190
checksum: 12b45869a636431bb6e1bfe193f02f301a6b484a0c4503bc793f5450c36c932a
194191

Taskfile.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ tasks:
1717
cmds:
1818
- docker compose exec -e PREFECT_CLIENT_RETRY_EXTRA_CODES=0 app python manage.py prefectcli work-pool create --type {{.TYPE}} {{.CLI_ARGS}} {{.NAME}}
1919
vars:
20-
TYPE: '{{.TYPE | default "process"}}'
20+
TYPE: '{{.TYPE | default "slurm"}}'
2121
NAME: '{{.NAME | default "slurm"}}'
2222

2323
deploy-flow:
@@ -82,7 +82,7 @@ tasks:
8282
interactive: true
8383

8484
test:
85-
desc: "Run pytest tests. E.g. `task test` or `task test -- -k study`"
85+
desc: "Run pytest tests. E.g. `task test` or `task test -- -k study` (for test selection), or `task test -- --lf` (for last failures only)"
8686
cmds:
8787
- docker compose run --entrypoint /bin/bash app -c "pytest {{.CLI_ARGS}}"
8888

analyses/admin/assembly.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ class StudyFilterForAssembly(StudyFilter):
4343
]
4444
search_fields = [
4545
"id",
46-
"run__ena_accessions",
46+
"runs__ena_accessions",
4747
"ena_study__title",
4848
"ena_study__accession",
4949
"ena_study__additional_accessions",
@@ -56,7 +56,7 @@ class StudyFilterForAssembly(StudyFilter):
5656
"reads_study__ena_study__additional_accessions",
5757
"ena_accessions",
5858
]
59-
autocomplete_fields = ["ena_study", "reads_study", "assembly_study", "run"]
59+
autocomplete_fields = ["ena_study", "reads_study", "assembly_study", "runs"]
6060
readonly_fields = ["created_at", "updated_at"]
6161

6262
def status_summary(self, obj):
@@ -76,7 +76,7 @@ def status_summary(self, obj):
7676
"Reads",
7777
{
7878
"classes": ["tab"],
79-
"fields": ["reads_study", "run"],
79+
"fields": ["reads_study", "runs", "sample"],
8080
},
8181
),
8282
(

analyses/admin/sample.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,19 @@
22
from unfold.admin import ModelAdmin
33
from unfold.decorators import display
44

5-
from analyses.admin.base import ENABrowserLinkMixin, JSONFieldWidgetOverridesMixin
6-
from analyses.models import Sample
5+
from analyses.admin.base import (
6+
ENABrowserLinkMixin,
7+
JSONFieldWidgetOverridesMixin,
8+
TabularInlinePaginatedWithTabSupport,
9+
)
10+
from analyses.models import Sample, SampleRelatedSample
11+
12+
13+
class SampleRelatedSamplesInline(TabularInlinePaginatedWithTabSupport):
14+
model = SampleRelatedSample
15+
autocomplete_fields = ["declaring_sample", "related_sample"]
16+
fk_name = "declaring_sample"
17+
extra = 0
718

819

920
@admin.register(Sample)
@@ -17,6 +28,7 @@ class SampleAdmin(ENABrowserLinkMixin, JSONFieldWidgetOverridesMixin, ModelAdmin
1728
autocomplete_fields = ["ena_sample", "ena_study", "studies"]
1829
list_display = ["first_accession", "updated_at", "display_accessions"]
1930
list_filter = ["updated_at", "created_at", "is_private"]
31+
inlines = [SampleRelatedSamplesInline]
2032

2133
@display(description="ENA Accessions", label=True)
2234
def display_accessions(self, instance: Sample):

analyses/admin/study.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@ class StudyAssembliesInline(TabularInlinePaginatedWithTabSupport):
6161
verbose_name = "Assembly in this study"
6262
verbose_name_plural = "Assemblies in this study"
6363
show_change_link = True
64-
fields = ["run", "status", "dir"]
65-
readonly_fields = ["run"]
64+
fields = ["runs", "status", "dir"]
65+
readonly_fields = ["runs"]
6666
max_num = 0
6767
fk_name = "assembly_study"
6868
formfield_overrides = {
@@ -87,8 +87,8 @@ class StudyReadsInline(TabularInlinePaginatedWithTabSupport):
8787
verbose_name = "Assembly of this studies' read"
8888
verbose_name_plural = "Assemblies of this studies' reads"
8989
show_change_link = True
90-
fields = ["run", "status", "dir"]
91-
readonly_fields = ["run"]
90+
fields = ["runs", "status", "dir"]
91+
readonly_fields = ["runs"]
9292
max_num = 0
9393
fk_name = "reads_study"
9494
formfield_overrides = {

analyses/fixtures/assembly/conftest.py

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,32 +20,34 @@ def mgnify_assemblies(raw_read_run, raw_reads_mgnify_study, assemblers):
2020
]
2121
assembly_objects = []
2222
# create metaspades assemblies
23-
for run in assembleable_runs:
24-
assembly_obj, _ = mg_models.Assembly.objects.get_or_create(
25-
run=run,
26-
reads_study=raw_reads_mgnify_study,
27-
ena_study=raw_reads_mgnify_study.ena_study,
28-
assembler=assembler_metaspades,
29-
dir="slurm-dev-environment/fs/hps/tests/assembly_uploader",
30-
metadata={"coverage": 20},
31-
sample=run.sample,
32-
ena_accessions=[
33-
"ERZ857107",
34-
],
23+
for i, run in enumerate(assembleable_runs):
24+
assembly_obj, created = (
25+
mg_models.Assembly.objects.get_or_create_for_run_and_sample(
26+
run=run,
27+
sample=run.sample,
28+
reads_study=raw_reads_mgnify_study,
29+
ena_study=raw_reads_mgnify_study.ena_study,
30+
assembler=assembler_metaspades,
31+
dir="slurm-dev-environment/fs/hps/tests/assembly_uploader",
32+
metadata={"coverage": 20},
33+
ena_accessions=[
34+
f"ERZ_METASPADES_{i}",
35+
],
36+
)
3537
)
3638
assembly_objects.append(assembly_obj)
3739

3840
# create one megahit assembly
39-
for run in assembleable_runs[:1]:
40-
assembly, _ = mg_models.Assembly.objects.get_or_create(
41+
for i, run in enumerate(assembleable_runs[:1]):
42+
assembly, created = mg_models.Assembly.objects.get_or_create_for_run_and_sample(
4143
run=run,
44+
sample=run.sample,
4245
reads_study=raw_reads_mgnify_study,
4346
ena_study=raw_reads_mgnify_study.ena_study,
4447
assembler=assembler_megahit,
4548
dir="/hps/tests/assembly_uploader",
4649
metadata={"coverage": 10},
47-
sample=run.sample,
48-
ena_accessions=["ERZ857108"],
50+
ena_accessions=[f"ERZ_MEGAHIT_{i}"],
4951
)
5052
assembly_objects.append(assembly)
5153
return assembly_objects
@@ -55,7 +57,7 @@ def mgnify_assemblies(raw_read_run, raw_reads_mgnify_study, assemblers):
5557
def mgnify_assemblies_completed(mgnify_assemblies):
5658
run_accession = "SRR6180434"
5759
metaspades_assemblies = mg_models.Assembly.objects.filter(
58-
assembler__name="metaspades", run__ena_accessions__contains=[run_accession]
60+
assembler__name="metaspades", runs__ena_accessions__contains=[run_accession]
5961
)
6062
for item in metaspades_assemblies:
6163
item.mark_status("assembly_started")
@@ -67,7 +69,7 @@ def mgnify_assemblies_completed(mgnify_assemblies):
6769
def mgnify_assembly_completed_uploader_sanity_check(mgnify_assemblies):
6870
run_accession = "SRR6180435"
6971
metaspades_assemblies = mg_models.Assembly.objects.filter(
70-
assembler__name="metaspades", run__ena_accessions__contains=[run_accession]
72+
assembler__name="metaspades", runs__ena_accessions__contains=[run_accession]
7173
)
7274
for item in metaspades_assemblies:
7375
item.mark_status("assembly_completed")
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Generated by Django 5.2.7 on 2026-01-22 14:37
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
("analyses", "0052_update_download_groups_to_pathways_and_systems"),
10+
]
11+
12+
operations = [
13+
migrations.AddField(
14+
model_name="assembly",
15+
name="runs",
16+
field=models.ManyToManyField(
17+
blank=True, related_name="assemblies_m2m", to="analyses.run"
18+
),
19+
),
20+
]

0 commit comments

Comments
 (0)