Skip to content

Commit 30fa9ed

Browse files
authored
Merge pull request #70 from VariantEffect/experiments-for-meta-analyses
2 parents 4275bcb + a24ec74 commit 30fa9ed

File tree

3 files changed

+82
-9
lines changed

3 files changed

+82
-9
lines changed

src/mavedb/lib/score_sets.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import pandas as pd
55
from pandas.testing import assert_index_equal
66
from sqlalchemy import func, or_
7-
from sqlalchemy.orm import Session
7+
from sqlalchemy.orm import Session, aliased
88

99
from mavedb.lib.array_comparison import assert_array_equal
1010
from mavedb.lib.exceptions import ValidationError
@@ -79,6 +79,36 @@ def search_score_sets(db: Session, owner: Optional[User], search: ScoreSetsSearc
7979
return score_sets # filter_visible_score_sets(score_sets)
8080

8181

82+
def find_meta_analyses_for_score_sets(db: Session, urns: list[str]) -> list[ScoreSet]:
83+
"""
84+
Find all score sets that are meta-analyses for a specified collection of other score sets.
85+
86+
:param db: An active database session.
87+
:param urns: A list of score set URNS.
88+
:return: A score set that is a meta-analysis for exactly the collection of score sets specified by urns; or None if
89+
there is no such meta-analysis.
90+
"""
91+
# Ensure that URNs are not repeated in the list.
92+
urns = list(set(urns))
93+
94+
# Find all score sets that are meta-analyses for a superset of the specified URNs and are meta-analysises for
95+
# exactly len(urns) score sets.
96+
score_set_aliases = [aliased(ScoreSet) for urn in urns]
97+
analyzed_score_set = aliased(ScoreSet)
98+
urn_filters = [
99+
ScoreSet.meta_analysis_source_score_sets.of_type(score_set_aliases[i]).any(score_set_aliases[i].urn == urn)
100+
for i, urn in enumerate(urns)
101+
]
102+
return (
103+
db.query(ScoreSet)
104+
.join(ScoreSet.meta_analysis_source_score_sets.of_type(analyzed_score_set))
105+
.filter(*urn_filters)
106+
.group_by(ScoreSet)
107+
.having(func.count(analyzed_score_set.id) == len(urns))
108+
.all()
109+
)
110+
111+
82112
def filter_visible_score_sets(items: list[ScoreSet]):
83113
# TODO Take the user into account.
84114
return filter(lambda item: not item.private, items or [])

src/mavedb/routers/score_sets.py

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,12 @@
2222
find_or_create_doi_identifier,
2323
find_or_create_publication_identifier,
2424
)
25-
from mavedb.lib.score_sets import create_variants_data, search_score_sets as _search_score_sets, VariantData
25+
from mavedb.lib.score_sets import (
26+
create_variants_data,
27+
search_score_sets as _search_score_sets,
28+
VariantData,
29+
find_meta_analyses_for_score_sets,
30+
)
2631
from mavedb.lib.urns import generate_experiment_set_urn, generate_experiment_urn, generate_score_set_urn
2732
from mavedb.lib.validation import exceptions
2833
from mavedb.lib.validation.constants.general import null_values_list
@@ -100,7 +105,9 @@ def search_my_score_sets(
100105

101106

102107
@router.get("/score-sets/{urn}", status_code=200, response_model=score_set.ScoreSet, responses={404: {}, 500: {}})
103-
async def show_score_set(*, urn: str, db: Session = Depends(deps.get_db), user: User = Depends(get_current_user)) -> Any:
108+
async def show_score_set(
109+
*, urn: str, db: Session = Depends(deps.get_db), user: User = Depends(get_current_user)
110+
) -> Any:
104111
"""
105112
Fetch a single score set by URN.
106113
"""
@@ -198,9 +205,30 @@ async def create_score_set(
198205
if item_create is None:
199206
return None
200207

201-
experiment = db.query(Experiment).filter(Experiment.urn == item_create.experiment_urn).one_or_none()
202-
if not experiment:
203-
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Unknown experiment")
208+
experiment: Experiment = None
209+
if item_create.experiment_urn is not None:
210+
experiment = db.query(Experiment).filter(Experiment.urn == item_create.experiment_urn).one_or_none()
211+
if not experiment:
212+
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Unknown experiment")
213+
if (
214+
item_create.meta_analysis_source_score_set_urns is not None
215+
and len(item_create.meta_analysis_source_score_set_urns) > 0
216+
):
217+
# If any existing score set is a meta-analysis for the same set of score sets, use its experiment as the parent
218+
# of our new meta-analysis. Otherwise, create a new experiment.
219+
existing_meta_analyses = find_meta_analyses_for_score_sets(db, item_create.meta_analysis_source_score_set_urns)
220+
if len(existing_meta_analyses) > 0:
221+
experiment = existing_meta_analyses[0].experiment
222+
else:
223+
experiment = Experiment(
224+
title=item_create.title,
225+
short_description=item_create.short_description,
226+
abstract_text=item_create.abstract_text,
227+
method_text=item_create.method_text,
228+
extra_metadata={},
229+
created_by=user,
230+
modified_by=user,
231+
)
204232

205233
license_ = db.query(License).filter(License.id == item_create.license_id).one_or_none()
206234
if not license_:
@@ -390,7 +418,9 @@ async def upload_score_set_variant_data(
390418
def create_variants(db, score_set: ScoreSet, variants_data: list[VariantData], batch_size=None) -> int:
391419
num_variants = len(variants_data)
392420
variant_urns = bulk_create_urns(num_variants, score_set, True)
393-
variants = (Variant(urn=urn, score_set_id=score_set.id, **kwargs) for urn, kwargs in zip(variant_urns, variants_data))
421+
variants = (
422+
Variant(urn=urn, score_set_id=score_set.id, **kwargs) for urn, kwargs in zip(variant_urns, variants_data)
423+
)
394424
db.bulk_save_objects(variants)
395425
db.add(score_set)
396426
return len(score_set.variants)

src/mavedb/view_models/score_set.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@
44
from datetime import date
55
from typing import Collection, Dict, Optional
66

7-
from pydantic import Field
7+
from pydantic import Field, root_validator
88

99
from mavedb.lib.validation import keywords, urn
10+
from mavedb.lib.validation.exceptions import ValidationError
1011
from mavedb.view_models import PublicationIdentifiersGetter
1112
from mavedb.view_models.base.base import BaseModel, validator
1213
from mavedb.view_models.doi_identifier import (
@@ -53,7 +54,7 @@ def validate_keywords(cls, v):
5354
class ScoreSetCreate(ScoreSetModify):
5455
"""View model for creating a new score set."""
5556

56-
experiment_urn: str
57+
experiment_urn: Optional[str]
5758
license_id: int
5859
superseded_score_set_urn: Optional[str]
5960
meta_analysis_source_score_set_urns: Optional[list[str]]
@@ -79,6 +80,18 @@ def validate_experiment_urn(cls, v):
7980
urn.validate_mavedb_urn_experiment(v)
8081
return v
8182

83+
@root_validator
84+
def validate_experiment_urn_required_except_for_meta_analyses(cls, values):
85+
experiment_urn = values["experiment_urn"]
86+
meta_analysis_source_score_set_urns = values["meta_analysis_source_score_set_urns"]
87+
is_meta_analysis = meta_analysis_source_score_set_urns is None or len(meta_analysis_source_score_set_urns) == 0
88+
if experiment_urn is None and is_meta_analysis:
89+
raise ValidationError("An experiment URN is required, unless your score set is a meta-analysis.")
90+
if experiment_urn is not None and not is_meta_analysis:
91+
raise ValidationError("An experiment URN should not be supplied when your score set is a meta-analysis.")
92+
return values
93+
94+
8295
class ScoreSetUpdate(ScoreSetModify):
8396
"""View model for updating a score set."""
8497

0 commit comments

Comments
 (0)