Skip to content

Commit 9dae7ac

Browse files
authored
Merge branch 'release-2024.4.2' into bugfix/bencap/329/editable-dataset-metadata
2 parents a59011f + 97deb21 commit 9dae7ac

File tree

9 files changed

+450
-251
lines changed

9 files changed

+450
-251
lines changed
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import sqlalchemy as sa
2+
from sqlalchemy.orm import Session, configure_mappers
3+
4+
from mavedb.models import *
5+
6+
from mavedb.lib.score_sets import refresh_variant_urns
7+
8+
from mavedb.models.score_set import ScoreSet
9+
from mavedb.models.variant import Variant
10+
11+
from mavedb.db.session import SessionLocal
12+
13+
configure_mappers()
14+
15+
16+
def do_migration(db: Session):
17+
published_score_sets_with_associated_tmp_variants: sa.ScalarResult[str]
18+
published_score_sets_with_associated_tmp_variants = db.execute(
19+
sa.select(sa.distinct(ScoreSet.urn)).join(Variant).where(ScoreSet.published_date.is_not(None), Variant.urn.like("%tmp:%"))
20+
).scalars()
21+
22+
for score_set_urn in published_score_sets_with_associated_tmp_variants:
23+
refresh_variant_urns(db, db.execute(sa.select(ScoreSet).where(ScoreSet.urn == score_set_urn)).scalar_one())
24+
25+
26+
if __name__ == "__main__":
27+
db = SessionLocal()
28+
db.current_user = None # type: ignore
29+
30+
do_migration(db)
31+
32+
db.commit()
33+
db.close()

poetry.lock

Lines changed: 307 additions & 248 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ python = "^3.9"
2626

2727
fqfa = "~1.3.0"
2828
pyhumps = "~3.8.0"
29-
pyyaml = "~5.1"
29+
pyyaml = "~6.0.1"
3030
IDUtils = "~1.2.0"
3131
mavehgvs = "~0.6.0"
3232
eutils = "~0.6.0"

src/mavedb/lib/experiments.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ def search_experiments(
9999
)
100100
)
101101

102-
items: list[Experiment] = query.order_by(Experiment.title).all()
102+
items: list[Experiment] = query.order_by(Experiment.urn, Experiment.title).all()
103103
if not items:
104104
items = []
105105

src/mavedb/lib/score_sets.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -617,6 +617,21 @@ def create_variants(db, score_set: ScoreSet, variants_data: list[VariantData], b
617617
return len(score_set.variants)
618618

619619

620+
def refresh_variant_urns(db: Session, score_set: ScoreSet):
621+
variants = db.execute(select(Variant).where(Variant.score_set_id == score_set.id)).scalars()
622+
623+
for variant in variants:
624+
if not variant.urn:
625+
raise ValueError("All variants should have an associated URN.")
626+
627+
variant_number = variant.urn.split("#")[1]
628+
refreshed_urn = f"{score_set.urn}#{variant_number}"
629+
variant.urn = refreshed_urn
630+
db.add(variant)
631+
632+
db.commit()
633+
634+
620635
def bulk_create_urns(n, score_set, reset_counter=False) -> list[str]:
621636
start_value = 0 if reset_counter else score_set.num_variants
622637
parent_urn = score_set.urn

src/mavedb/routers/score_sets.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
)
4040
from mavedb.lib.score_sets import (
4141
search_score_sets as _search_score_sets,
42+
refresh_variant_urns,
4243
)
4344
from mavedb.lib.taxonomies import find_or_create_taxonomy
4445
from mavedb.lib.urns import (
@@ -333,6 +334,10 @@ async def create_score_set(
333334
msg="Failed to create score set; The requested experiment does not exist.", extra=logging_context()
334335
)
335336
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Unknown experiment")
337+
# Not allow add score set in meta-analysis experiments.
338+
if any(s.meta_analyzes_score_sets for s in experiment.score_sets):
339+
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN,
340+
detail="Score sets may not be added to a meta-analysis experiment.")
336341

337342
save_to_logging_context({"experiment": experiment.urn})
338343
assert_permission(user_data, experiment, Action.ADD_SCORE_SET)
@@ -385,7 +390,7 @@ async def create_score_set(
385390
)
386391

387392
if len(meta_analyzes_score_sets) > 0:
388-
# If any existing score set is a meta-analysis for score sets in the same collection of exepriment sets, use its
393+
# If any existing score set is a meta-analysis for score sets in the same collection of experiment sets, use its
389394
# experiment as the parent of our new meta-analysis. Otherwise, create a new experiment.
390395
meta_analyzes_experiment_sets = list(
391396
set(
@@ -1006,6 +1011,7 @@ def publish_score_set(
10061011
item.urn = generate_score_set_urn(db, item.experiment)
10071012
item.private = False
10081013
item.published_date = published_date
1014+
refresh_variant_urns(db, item)
10091015

10101016
save_to_logging_context({"score_set": item.urn})
10111017

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
hgvs_nt,hgvs_pro,c_0,c_1
2+
c.1A>T,p.Thr1Ser,10,20
3+
c.2C>T,p.Thr1Met,8,8
4+
c.6T>A,p.Phe2Leu,90,2
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
hgvs_nt,hgvs_pro,score
2+
c.1A>T,p.Thr1Ser,0.3
3+
c.2C>T,p.Thr1Met,0
4+
c.6T>A,p.Phe2Leu,-1.65

tests/routers/test_score_set.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,13 @@
77
import pytest
88
from arq import ArqRedis
99
from humps import camelize
10+
from sqlalchemy import select
1011

1112
from mavedb.lib.validation.urn_re import MAVEDB_TMP_URN_RE, MAVEDB_SCORE_SET_URN_RE, MAVEDB_EXPERIMENT_URN_RE
1213
from mavedb.models.enums.processing_state import ProcessingState
1314
from mavedb.models.experiment import Experiment as ExperimentDbModel
1415
from mavedb.models.score_set import ScoreSet as ScoreSetDbModel
16+
from mavedb.models.variant import Variant as VariantDbModel
1517
from mavedb.view_models.orcid import OrcidUser
1618
from mavedb.view_models.score_set import ScoreSet, ScoreSetCreate
1719
from tests.helpers.constants import (
@@ -531,6 +533,59 @@ def test_add_score_set_variants_scores_and_counts_endpoint(session, client, setu
531533
assert score_set == response_data
532534

533535

536+
def test_add_score_set_variants_scores_only_endpoint_utf8_encoded(client, setup_router_db, data_files):
537+
experiment = create_experiment(client)
538+
score_set = create_seq_score_set(client, experiment["urn"])
539+
scores_csv_path = data_files / "scores_utf8_encoded.csv"
540+
with (
541+
open(scores_csv_path, "rb") as scores_file,
542+
patch.object(ArqRedis, "enqueue_job", return_value=None) as queue,
543+
):
544+
response = client.post(
545+
f"/api/v1/score-sets/{score_set['urn']}/variants/data",
546+
files={"scores_file": (scores_csv_path.name, scores_file, "text/csv")},
547+
)
548+
queue.assert_called_once()
549+
550+
assert response.status_code == 200
551+
response_data = response.json()
552+
jsonschema.validate(instance=response_data, schema=ScoreSet.schema())
553+
554+
# We test the worker process that actually adds the variant data separately. Here, we take it as
555+
# fact that it would have succeeded.
556+
score_set.update({"processingState": "processing"})
557+
assert score_set == response_data
558+
559+
560+
def test_add_score_set_variants_scores_and_counts_endpoint_utf8_encoded(session, client, setup_router_db, data_files):
561+
experiment = create_experiment(client)
562+
score_set = create_seq_score_set(client, experiment["urn"])
563+
scores_csv_path = data_files / "scores_utf8_encoded.csv"
564+
counts_csv_path = data_files / "counts_utf8_encoded.csv"
565+
with (
566+
open(scores_csv_path, "rb") as scores_file,
567+
open(counts_csv_path, "rb") as counts_file,
568+
patch.object(ArqRedis, "enqueue_job", return_value=None) as queue,
569+
):
570+
response = client.post(
571+
f"/api/v1/score-sets/{score_set['urn']}/variants/data",
572+
files={
573+
"scores_file": (scores_csv_path.name, scores_file, "text/csv"),
574+
"counts_file": (counts_csv_path.name, counts_file, "text/csv"),
575+
},
576+
)
577+
queue.assert_called_once()
578+
579+
assert response.status_code == 200
580+
response_data = response.json()
581+
jsonschema.validate(instance=response_data, schema=ScoreSet.schema())
582+
583+
# We test the worker process that actually adds the variant data separately. Here, we take it as
584+
# fact that it would have succeeded.
585+
score_set.update({"processingState": "processing"})
586+
assert score_set == response_data
587+
588+
534589
def test_cannot_add_scores_to_score_set_without_email(session, client, setup_router_db, data_files):
535590
experiment = create_experiment(client)
536591
score_set = create_seq_score_set(client, experiment["urn"])
@@ -788,6 +843,11 @@ def test_publish_score_set(session, data_provider, client, setup_router_db, data
788843
for key in expected_response:
789844
assert (key, expected_response[key]) == (key, score_set[key])
790845

846+
score_set_variants = session.execute(
847+
select(VariantDbModel).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set["urn"])
848+
).scalars()
849+
assert all([variant.urn.startswith("urn:mavedb:") for variant in score_set_variants])
850+
791851

792852
def test_publish_multiple_score_sets(session, data_provider, client, setup_router_db, data_files):
793853
experiment = create_experiment(client)
@@ -820,6 +880,19 @@ def test_publish_multiple_score_sets(session, data_provider, client, setup_route
820880
assert pub_score_set_3_data["title"] == score_set_3["title"]
821881
assert pub_score_set_3_data["experiment"]["urn"] == "urn:mavedb:00000001-a"
822882

883+
score_set_1_variants = session.execute(
884+
select(VariantDbModel).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_1["urn"])
885+
).scalars()
886+
assert all([variant.urn.startswith("urn:mavedb:") for variant in score_set_1_variants])
887+
score_set_2_variants = session.execute(
888+
select(VariantDbModel).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_2["urn"])
889+
).scalars()
890+
assert all([variant.urn.startswith("urn:mavedb:") for variant in score_set_2_variants])
891+
score_set_3_variants = session.execute(
892+
select(VariantDbModel).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_3["urn"])
893+
).scalars()
894+
assert all([variant.urn.startswith("urn:mavedb:") for variant in score_set_3_variants])
895+
823896

824897
def test_cannot_publish_score_set_without_variants(client, setup_router_db):
825898
experiment = create_experiment(client)
@@ -917,6 +990,11 @@ def test_contributor_can_publish_other_users_score_set(session, data_provider, c
917990
for key in expected_response:
918991
assert (key, expected_response[key]) == (key, score_set[key])
919992

993+
score_set_variants = session.execute(
994+
select(VariantDbModel).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set["urn"])
995+
).scalars()
996+
assert all([variant.urn.startswith("urn:mavedb:") for variant in score_set_variants])
997+
920998

921999
def test_admin_cannot_publish_other_user_private_score_set(
9221000
session, data_provider, client, admin_app_overrides, setup_router_db, data_files

0 commit comments

Comments
 (0)