diff --git a/pyproject.toml b/pyproject.toml index 2a7d8476..4eefc7a1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "mavedb" -version = "2024.4.3" +version = "2025.0.0" description = "API for MaveDB, the database of Multiplexed Assays of Variant Effect." license = "AGPL-3.0-only" readme = "README.md" diff --git a/src/mavedb/__init__.py b/src/mavedb/__init__.py index e01dd0ef..d0a55f5c 100644 --- a/src/mavedb/__init__.py +++ b/src/mavedb/__init__.py @@ -6,6 +6,6 @@ logger = module_logging.getLogger(__name__) __project__ = "mavedb-api" -__version__ = "2024.4.3" +__version__ = "2025.0.0" logger.info(f"MaveDB {__version__}") diff --git a/src/mavedb/lib/experiments.py b/src/mavedb/lib/experiments.py index d771f26b..1dff7090 100644 --- a/src/mavedb/lib/experiments.py +++ b/src/mavedb/lib/experiments.py @@ -1,7 +1,7 @@ import logging from typing import Optional -from sqlalchemy import func, or_ +from sqlalchemy import func, or_, not_ from sqlalchemy.orm import Session from mavedb.lib.logging.context import logging_context, save_to_logging_context @@ -99,6 +99,19 @@ def search_experiments( ) ) + if search.meta_analysis is not None: + if not search.meta_analysis: + query = query.filter( + or_( + # Keep experiments without any score sets + not_(Experiment.score_sets.any()), + # Keep experiments where score sets exist but have no meta_analyzes_score_sets + Experiment.score_sets.any(not_(ScoreSet.meta_analyzes_score_sets.any())) + ) + ) + else: + query = query.filter(Experiment.score_sets.any(ScoreSet.meta_analyzes_score_sets.any())) + items: list[Experiment] = query.order_by(Experiment.urn, Experiment.title).all() if not items: items = [] diff --git a/src/mavedb/routers/experiments.py b/src/mavedb/routers/experiments.py index cea7209e..ec0d65e0 100644 --- a/src/mavedb/routers/experiments.py +++ b/src/mavedb/routers/experiments.py @@ -7,6 +7,7 @@ from fastapi import APIRouter, Depends, HTTPException from fastapi.encoders import jsonable_encoder from sqlalchemy.orm import Session +from sqlalchemy import or_ from mavedb import deps from mavedb.lib.authentication import UserData, get_current_user @@ -43,7 +44,7 @@ ) -# TODO: Rewrite this function. +# None of any part calls this function. Feel free to modify it if we need it in the future. @router.get( "/experiments/", status_code=200, @@ -53,7 +54,6 @@ def list_experiments( *, editable: Optional[bool] = None, - q: Optional[str] = None, db: Session = Depends(deps.get_db), user_data: Optional[UserData] = Depends(get_current_user), ) -> list[Experiment]: @@ -61,22 +61,19 @@ def list_experiments( List experiments. """ query = db.query(Experiment) - if q is not None: - save_to_logging_context({"query_string": q}) + if editable: if user_data is None or user_data.user is None: logger.debug(msg="User is anonymous; Cannot list their experiments.", extra=logging_context()) return [] - if len(q) > 0: - logger.debug(msg="Listing experiments for the current user.", extra=logging_context()) - query = query.filter( - Experiment.created_by_id == user_data.user.id - ) # .filter(Experiment.published_date is None) - # else: - # query = query.filter(Experiment.created_by_id == user.id).filter(Experiment.published_date is None) - else: - logger.debug(msg="No query string was provided; Listing all experiments.", extra=logging_context()) + logger.debug(msg="Listing experiments for the current user.", extra=logging_context()) + query = query.filter( + or_( + Experiment.created_by_id == user_data.user.id, + Experiment.contributors.any(Contributor.orcid_id == user_data.user.username) + ) + ) items = query.order_by(Experiment.urn).all() return items diff --git a/src/mavedb/routers/hgvs.py b/src/mavedb/routers/hgvs.py index 57ceecad..87ee26e5 100644 --- a/src/mavedb/routers/hgvs.py +++ b/src/mavedb/routers/hgvs.py @@ -11,7 +11,7 @@ router = APIRouter( prefix="/api/v1/hgvs", - tags=["hgvs"], + tags=["transcripts"], responses={404: {"description": "Not found"}}, ) @@ -85,7 +85,7 @@ def gene_info(gene: str, hdp: RESTDataProvider = Depends(hgvs_data_provider)) -> return gene_info -@router.get("/transcripts/gene/{gene}", status_code=200, response_model=list[str]) +@router.get("/gene/{gene}", status_code=200, response_model=list[str]) def list_transcripts_for_gene(gene: str, hdp: RESTDataProvider = Depends(hgvs_data_provider)) -> list[str]: """ List transcripts associated with a particular gene @@ -98,7 +98,7 @@ def list_transcripts_for_gene(gene: str, hdp: RESTDataProvider = Depends(hgvs_da return list(transcripts) -@router.get("/transcripts/{transcript}", status_code=200, response_model=dict[str, Any]) +@router.get("/{transcript}", status_code=200, response_model=dict[str, Any]) def transcript_info(transcript: str, hdp: RESTDataProvider = Depends(hgvs_data_provider)) -> dict[str, Any]: """ List transcript information for a particular transcript @@ -111,7 +111,7 @@ def transcript_info(transcript: str, hdp: RESTDataProvider = Depends(hgvs_data_p return transcript_info -@router.get("/transcripts/protein/{transcript}", status_code=200, response_model=str) +@router.get("/protein/{transcript}", status_code=200, response_model=str) def convert_to_protein(transcript: str, hdp: RESTDataProvider = Depends(hgvs_data_provider)) -> str: """ Convert a provided transcript from it's nucleotide accession identifier to its protein accession identifier diff --git a/src/mavedb/view_models/search.py b/src/mavedb/view_models/search.py index ed7f597c..307c1440 100644 --- a/src/mavedb/view_models/search.py +++ b/src/mavedb/view_models/search.py @@ -11,6 +11,7 @@ class ExperimentsSearch(BaseModel): publication_identifiers: Optional[list[str]] keywords: Optional[list[str]] text: Optional[str] + meta_analysis: Optional[bool] class ScoreSetsSearch(BaseModel): diff --git a/tests/routers/test_experiments.py b/tests/routers/test_experiments.py index 51dde99e..5b864e9c 100644 --- a/tests/routers/test_experiments.py +++ b/tests/routers/test_experiments.py @@ -973,6 +973,58 @@ def test_search_my_experiments(session, client, setup_router_db): assert response.json()[0]["title"] == experiment["title"] +def test_search_meta_analysis_experiment(session, data_provider, client, setup_router_db, data_files): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_variants( + client, session, data_provider, experiment["urn"], data_files / "scores.csv" + ) + + score_set = (client.post(f"/api/v1/score-sets/{score_set['urn']}/publish")).json() + meta_score_set = create_seq_score_set_with_variants( + client, + session, + data_provider, + None, + data_files / "scores.csv", + update={"title": "Test Meta Analysis", "metaAnalyzesScoreSetUrns": [score_set["urn"]]}, + ) + + meta_score_set = (client.post(f"/api/v1/score-sets/{meta_score_set['urn']}/publish")).json() + score_set_refresh = (client.get(f"/api/v1/score-sets/{score_set['urn']}")).json() + search_payload = {"metaAnalysis": True} + response = client.post("/api/v1/me/experiments/search", json=search_payload) + assert response.status_code == 200 + response_data = response.json() + assert any(item["urn"] == meta_score_set["experiment"]["urn"] for item in response_data) + assert all(item["urn"] != score_set_refresh["experiment"]["urn"] for item in response_data) + + +def test_search_exclude_meta_analysis_experiment(session, data_provider, client, setup_router_db, data_files): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_variants( + client, session, data_provider, experiment["urn"], data_files / "scores.csv" + ) + + score_set = (client.post(f"/api/v1/score-sets/{score_set['urn']}/publish")).json() + meta_score_set = create_seq_score_set_with_variants( + client, + session, + data_provider, + None, + data_files / "scores.csv", + update={"title": "Test Meta Analysis", "metaAnalyzesScoreSetUrns": [score_set["urn"]]}, + ) + + meta_score_set = (client.post(f"/api/v1/score-sets/{meta_score_set['urn']}/publish")).json() + score_set_refresh = (client.get(f"/api/v1/score-sets/{score_set['urn']}")).json() + search_payload = {"metaAnalysis": False} + response = client.post("/api/v1/me/experiments/search", json=search_payload) + assert response.status_code == 200 + response_data = response.json() + assert any(item["urn"] == score_set_refresh["experiment"]["urn"] for item in response_data) + assert all(item["urn"] != meta_score_set["experiment"]["urn"] for item in response_data) + + def test_search_score_sets_for_experiments(session, client, setup_router_db, data_files, data_provider): experiment = create_experiment(client) score_set_pub = create_seq_score_set_with_variants( diff --git a/tests/routers/test_hgvs.py b/tests/routers/test_hgvs.py index 092081bb..f59e5c27 100644 --- a/tests/routers/test_hgvs.py +++ b/tests/routers/test_hgvs.py @@ -122,7 +122,7 @@ def test_hgvs_gene_transcript_valid(client, setup_router_db): json={"results": [{"hgnc": f"{VALID_GENE}", "tx_ac": VALID_TRANSCRIPT}]}, ) - response = client.get(f"/api/v1/hgvs/transcripts/gene/{VALID_GENE}") + response = client.get(f"/api/v1/hgvs/gene/{VALID_GENE}") assert response.status_code == 200 assert VALID_TRANSCRIPT in response.json() @@ -131,7 +131,7 @@ def test_hgvs_gene_transcript_invalid(client, setup_router_db): with requests_mock.mock() as m: m.get(f"https://cdot.cc/transcripts/gene/{INVALID_GENE}", status_code=404) - response = client.get(f"/api/v1/hgvs/transcripts/gene/{INVALID_GENE}") + response = client.get(f"/api/v1/hgvs/gene/{INVALID_GENE}") assert m.called assert response.status_code == 404 @@ -139,7 +139,7 @@ def test_hgvs_gene_transcript_invalid(client, setup_router_db): def test_hgvs_transcript_valid(client, setup_router_db): with patch.object(cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT): - response = client.get(f"/api/v1/hgvs/transcripts/{VALID_TRANSCRIPT}") + response = client.get(f"/api/v1/hgvs/{VALID_TRANSCRIPT}") assert response.status_code == 200 assert response.json()["hgnc"] == VALID_GENE @@ -149,7 +149,7 @@ def test_hgvs_transcript_invalid(client, setup_router_db): with requests_mock.mock() as m: m.get(f"https://cdot.cc/transcript/{INVALID_TRANSCRIPT}", status_code=404) - response = client.get(f"/api/v1/hgvs/transcripts/{INVALID_TRANSCRIPT}") + response = client.get(f"/api/v1/hgvs/{INVALID_TRANSCRIPT}") assert m.called assert response.status_code == 404 @@ -163,7 +163,7 @@ def test_hgvs_transcript_protein_valid(client, setup_router_db): json={"biotype": ["protein_coding"], "gene_name": "A2M", "gene_vesion": "2", "protein": "NP_000005.2"}, ) - response = client.get(f"/api/v1/hgvs/transcripts/protein/{HAS_PROTEIN_ACCESSION}") + response = client.get(f"/api/v1/hgvs/protein/{HAS_PROTEIN_ACCESSION}") assert m.called @@ -175,7 +175,7 @@ def test_hgvs_transcript_protein_no_protein(client, setup_router_db): with requests_mock.mock() as m: m.get(f"https://cdot.cc/transcript/{SMALL_ACCESSION}", status_code=404) - response = client.get(f"/api/v1/hgvs/transcripts/protein/{SMALL_ACCESSION}") + response = client.get(f"/api/v1/hgvs/protein/{SMALL_ACCESSION}") assert m.called assert response.status_code == 404 @@ -185,7 +185,7 @@ def test_hgvs_transcript_protein_invalid(client, setup_router_db): with requests_mock.mock() as m: m.get(f"https://cdot.cc/transcript/{INVALID_ACCESSION}", status_code=404) - response = client.get(f"/api/v1/hgvs/transcripts/protein/{INVALID_ACCESSION}") + response = client.get(f"/api/v1/hgvs/protein/{INVALID_ACCESSION}") assert m.called assert response.status_code == 404 diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py index 1b64683f..c0d598fe 100644 --- a/tests/routers/test_score_set.py +++ b/tests/routers/test_score_set.py @@ -1071,6 +1071,7 @@ def test_publish_single_score_set_meta_analysis(session, data_provider, client, ) meta_score_set = (client.post(f"/api/v1/score-sets/{meta_score_set['urn']}/publish")).json() + assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(meta_score_set["urn"]), re.Match) assert meta_score_set["urn"] == "urn:mavedb:00000001-0-1" @@ -1101,6 +1102,7 @@ def test_multiple_score_set_meta_analysis_single_experiment( assert score_set_1_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set["urn"]] meta_score_set = (client.post(f"/api/v1/score-sets/{meta_score_set['urn']}/publish")).json() + assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(meta_score_set["urn"]), re.Match) assert meta_score_set["urn"] == "urn:mavedb:00000001-0-1" @@ -1132,6 +1134,7 @@ def test_multiple_score_set_meta_analysis_multiple_experiment_sets( assert score_set_1_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set["urn"]] meta_score_set = (client.post(f"/api/v1/score-sets/{meta_score_set['urn']}/publish")).json() + assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(meta_score_set["urn"]), re.Match) assert meta_score_set["urn"] == "urn:mavedb:00000003-0-1" @@ -1165,6 +1168,7 @@ def test_multiple_score_set_meta_analysis_multiple_experiments( assert score_set_1_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set["urn"]] meta_score_set = (client.post(f"/api/v1/score-sets/{meta_score_set['urn']}/publish")).json() + assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(meta_score_set["urn"]), re.Match) assert meta_score_set["urn"] == "urn:mavedb:00000001-0-1" @@ -1254,8 +1258,126 @@ def test_multiple_score_set_meta_analysis_multiple_experiment_sets_different_sco assert meta_score_set_2["urn"] == "urn:mavedb:00000003-0-2" meta_score_set_3 = (client.post(f"/api/v1/score-sets/{meta_score_set_3['urn']}/publish")).json() assert meta_score_set_3["urn"] == "urn:mavedb:00000003-0-3" + assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(meta_score_set_1["urn"]), re.Match) + assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(meta_score_set_2["urn"]), re.Match) + assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(meta_score_set_3["urn"]), re.Match) +def test_cannot_add_score_set_to_meta_analysis_experiment(session, data_provider, client, setup_router_db, data_files): + experiment = create_experiment(client) + score_set_1 = create_seq_score_set_with_variants( + client, session, data_provider, experiment["urn"], data_files / "scores.csv" + ) + + score_set_1 = (client.post(f"/api/v1/score-sets/{score_set_1['urn']}/publish")).json() + meta_score_set_1 = create_seq_score_set_with_variants( + client, + session, + data_provider, + None, + data_files / "scores.csv", + update={"title": "Test Meta Analysis", "metaAnalyzesScoreSetUrns": [score_set_1["urn"]]}, + ) + + meta_score_set_1 = (client.post(f"/api/v1/score-sets/{meta_score_set_1['urn']}/publish")).json() + assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(meta_score_set_1["urn"]), re.Match) + score_set_2 = deepcopy(TEST_MINIMAL_SEQ_SCORESET) + score_set_2["experimentUrn"] = meta_score_set_1['experiment']['urn'] + jsonschema.validate(instance=score_set_2, schema=ScoreSetCreate.schema()) + + response = client.post("/api/v1/score-sets/", json=score_set_2) + response_data = response.json() + assert response.status_code == 403 + assert "Score sets may not be added to a meta-analysis experiment." in response_data["detail"] + + +def test_create_single_score_set_meta_analysis_to_others_score_set(session, data_provider, client, setup_router_db, data_files): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_variants( + client, session, data_provider, experiment["urn"], data_files / "scores.csv" + ) + + score_set = (client.post(f"/api/v1/score-sets/{score_set['urn']}/publish")).json() + change_ownership(session, score_set["urn"], ScoreSetDbModel) + meta_score_set = create_seq_score_set_with_variants( + client, + session, + data_provider, + None, + data_files / "scores.csv", + update={"title": "Test Meta Analysis", "metaAnalyzesScoreSetUrns": [score_set["urn"]]}, + ) + + score_set_refresh = (client.get(f"/api/v1/score-sets/{score_set['urn']}")).json() + assert meta_score_set["metaAnalyzesScoreSetUrns"] == [score_set["urn"]] + assert score_set_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set["urn"]] + assert isinstance(MAVEDB_TMP_URN_RE.fullmatch(meta_score_set["urn"]), re.Match) + + +def test_multiple_score_set_meta_analysis_single_experiment_with_different_creator( + session, data_provider, client, setup_router_db, data_files +): + experiment = create_experiment(client) + score_set_1 = create_seq_score_set_with_variants( + client, session, data_provider, experiment["urn"], data_files / "scores.csv", update={"title": "Score Set 1"} + ) + score_set_2 = create_seq_score_set_with_variants( + client, session, data_provider, experiment["urn"], data_files / "scores.csv", update={"title": "Score Set 2"} + ) + + score_set_1 = (client.post(f"/api/v1/score-sets/{score_set_1['urn']}/publish")).json() + score_set_2 = (client.post(f"/api/v1/score-sets/{score_set_2['urn']}/publish")).json() + + change_ownership(session, score_set_2["urn"], ScoreSetDbModel) + meta_score_set = create_seq_score_set_with_variants( + client, + session, + data_provider, + None, + data_files / "scores.csv", + update={"title": "Test Meta Analysis", "metaAnalyzesScoreSetUrns": [score_set_1["urn"], score_set_2["urn"]]}, + ) + score_set_1_refresh = (client.get(f"/api/v1/score-sets/{score_set_1['urn']}")).json() + assert meta_score_set["metaAnalyzesScoreSetUrns"] == sorted([score_set_1["urn"], score_set_2["urn"]]) + assert score_set_1_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set["urn"]] + + meta_score_set = (client.post(f"/api/v1/score-sets/{meta_score_set['urn']}/publish")).json() + assert meta_score_set["urn"] == "urn:mavedb:00000001-0-1" + assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(meta_score_set["urn"]), re.Match) + + +def test_multiple_score_set_meta_analysis_multiple_experiment_sets_with_different_creator( + session, data_provider, client, setup_router_db, data_files +): + experiment_1 = create_experiment(client, {"title": "Experiment 1"}) + experiment_2 = create_experiment(client, {"title": "Experiment 2"}) + score_set_1 = create_seq_score_set_with_variants( + client, session, data_provider, experiment_1["urn"], data_files / "scores.csv", update={"title": "Score Set 1"} + ) + score_set_2 = create_seq_score_set_with_variants( + client, session, data_provider, experiment_2["urn"], data_files / "scores.csv", update={"title": "Score Set 2"} + ) + + score_set_1 = (client.post(f"/api/v1/score-sets/{score_set_1['urn']}/publish")).json() + score_set_2 = (client.post(f"/api/v1/score-sets/{score_set_2['urn']}/publish")).json() + + change_ownership(session, score_set_2["urn"], ScoreSetDbModel) + meta_score_set = create_seq_score_set_with_variants( + client, + session, + data_provider, + None, + data_files / "scores.csv", + update={"title": "Test Meta Analysis", "metaAnalyzesScoreSetUrns": [score_set_1["urn"], score_set_2["urn"]]}, + ) + score_set_1_refresh = (client.get(f"/api/v1/score-sets/{score_set_1['urn']}")).json() + assert meta_score_set["metaAnalyzesScoreSetUrns"] == sorted([score_set_1["urn"], score_set_2["urn"]]) + assert score_set_1_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set["urn"]] + + meta_score_set = (client.post(f"/api/v1/score-sets/{meta_score_set['urn']}/publish")).json() + assert meta_score_set["urn"] == "urn:mavedb:00000003-0-1" + assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(meta_score_set["urn"]), re.Match) + ######################################################################################################################## # Score set search ########################################################################################################################