Skip to content

Commit c95060d

Browse files
authored
Merge pull request #366 from VariantEffect/estelle/debugShowTmpSupersedingScoreSet
Fix current version bug
2 parents c267ede + 86d6e03 commit c95060d

File tree

6 files changed

+589
-31
lines changed

6 files changed

+589
-31
lines changed

src/mavedb/lib/score_sets.py

Lines changed: 95 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
import io
33
import logging
44
import re
5-
from typing import Any, BinaryIO, Iterable, Optional, Sequence
5+
from operator import attrgetter
6+
from typing import Any, BinaryIO, Iterable, Optional, TYPE_CHECKING, Sequence
67

78
import numpy as np
89
import pandas as pd
@@ -48,6 +49,10 @@
4849
from mavedb.models.variant import Variant
4950
from mavedb.view_models.search import ScoreSetsSearch
5051

52+
if TYPE_CHECKING:
53+
from mavedb.lib.authentication import UserData
54+
from mavedb.lib.permissions import Action
55+
5156
VariantData = dict[str, Optional[dict[str, dict]]]
5257

5358
logger = logging.getLogger(__name__)
@@ -69,9 +74,6 @@ def search_score_sets(db: Session, owner_or_contributor: Optional[User], search:
6974
query = db.query(ScoreSet) # \
7075
# .filter(ScoreSet.private.is_(False))
7176

72-
# filter out the score sets that are replaced by other score sets
73-
query = query.filter(~ScoreSet.superseding_score_set.has())
74-
7577
if owner_or_contributor is not None:
7678
query = query.filter(
7779
or_(
@@ -263,6 +265,41 @@ def search_score_sets(db: Session, owner_or_contributor: Optional[User], search:
263265
return score_sets # filter_visible_score_sets(score_sets)
264266

265267

268+
def fetch_superseding_score_set_in_search_result(
269+
score_sets: list[ScoreSet],
270+
requesting_user: Optional["UserData"],
271+
search: ScoreSetsSearch) -> list[ScoreSet]:
272+
"""
273+
Remove superseded score set from search results.
274+
Check whether all of the score set are correct versions.
275+
"""
276+
from mavedb.lib.permissions import Action
277+
if search.published:
278+
filtered_score_sets_tail = [
279+
find_publish_or_private_superseded_score_set_tail(
280+
score_set,
281+
Action.READ,
282+
requesting_user,
283+
search.published
284+
) for score_set in score_sets
285+
]
286+
else:
287+
filtered_score_sets_tail = [
288+
find_superseded_score_set_tail(
289+
score_set,
290+
Action.READ,
291+
requesting_user
292+
) for score_set in score_sets
293+
]
294+
# Remove None item.
295+
filtered_score_sets = [score_set for score_set in filtered_score_sets_tail if score_set is not None]
296+
if filtered_score_sets:
297+
final_score_sets = sorted(set(filtered_score_sets), key=attrgetter("urn"))
298+
else:
299+
final_score_sets = []
300+
return final_score_sets
301+
302+
266303
def find_meta_analyses_for_experiment_sets(db: Session, urns: list[str]) -> list[ScoreSet]:
267304
"""
268305
Find all score sets that are meta-analyses for score sets from a specified collection of experiment sets.
@@ -307,6 +344,60 @@ def find_meta_analyses_for_experiment_sets(db: Session, urns: list[str]) -> list
307344
)
308345

309346

347+
def find_superseded_score_set_tail(
348+
score_set: ScoreSet,
349+
action: Optional["Action"] = None,
350+
user_data: Optional["UserData"] = None) -> Optional[ScoreSet]:
351+
from mavedb.lib.permissions import has_permission
352+
while score_set.superseding_score_set is not None:
353+
next_score_set_in_chain = score_set.superseding_score_set
354+
355+
# If we were given a permission to check and the next score set in the chain does not have that permission,
356+
# pretend like we have reached the end of the chain. Otherwise, continue to the next score set.
357+
if action is not None and not has_permission(user_data, next_score_set_in_chain, action).permitted:
358+
return score_set
359+
360+
score_set = next_score_set_in_chain
361+
362+
# Handle unpublished superseding score set case.
363+
# The score set has a published superseded score set but has not superseding score set.
364+
if action is not None and not has_permission(user_data, score_set, action).permitted:
365+
while score_set.superseded_score_set is not None:
366+
next_score_set_in_chain = score_set.superseded_score_set
367+
if has_permission(user_data, next_score_set_in_chain, action).permitted:
368+
return next_score_set_in_chain
369+
else:
370+
score_set = next_score_set_in_chain
371+
return None
372+
373+
return score_set
374+
375+
376+
def find_publish_or_private_superseded_score_set_tail(
377+
score_set: ScoreSet,
378+
action: Optional["Action"] = None,
379+
user_data: Optional["UserData"] = None,
380+
publish: bool = True) -> Optional[ScoreSet]:
381+
from mavedb.lib.permissions import has_permission
382+
if publish:
383+
while score_set.superseding_score_set is not None:
384+
next_score_set_in_chain = score_set.superseding_score_set
385+
# Find the final published one.
386+
if action is not None and has_permission(user_data, score_set, action).permitted \
387+
and next_score_set_in_chain.published_date is None:
388+
return score_set
389+
score_set = next_score_set_in_chain
390+
else:
391+
# Unpublished score set should not be superseded.
392+
# It should not have superseding score set, but possible have superseded score set.
393+
if action is not None and score_set.published_date is None \
394+
and has_permission(user_data, score_set, action).permitted:
395+
return score_set
396+
else:
397+
return None
398+
return score_set
399+
400+
310401
def get_score_set_counts_as_csv(
311402
db: Session,
312403
score_set: ScoreSet,

src/mavedb/lib/validation/urn_re.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88
MAVEDB_TMP_URN_PATTERN = r"tmp:[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"
99
MAVEDB_TMP_URN_RE = re.compile(MAVEDB_TMP_URN_PATTERN)
1010

11+
# Old temp URN
12+
MAVEDB_OLD_TMP_URN_PATTERN = r"^tmp:[A-Za-z0-9]{16}$"
13+
MAVEDB_OLD_TMP_URN_RE = re.compile(MAVEDB_OLD_TMP_URN_PATTERN)
14+
1115
# Experiment set URN
1216
MAVEDB_EXPERIMENT_SET_URN_PATTERN = rf"urn:{MAVEDB_URN_NAMESPACE}:\d{{{MAVEDB_EXPERIMENT_SET_URN_DIGITS}}}"
1317
MAVEDB_EXPERIMENT_SET_URN_RE = re.compile(MAVEDB_EXPERIMENT_SET_URN_PATTERN)

src/mavedb/routers/experiments.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@
2323
from mavedb.lib.keywords import search_keyword
2424
from mavedb.lib.logging import LoggedRoute
2525
from mavedb.lib.logging.context import logging_context, save_to_logging_context
26-
from mavedb.lib.permissions import Action, assert_permission, has_permission
26+
from mavedb.lib.permissions import Action, assert_permission
27+
from mavedb.lib.score_sets import find_superseded_score_set_tail
2728
from mavedb.lib.validation.exceptions import ValidationError
2829
from mavedb.lib.validation.keywords import validate_keyword_list
2930
from mavedb.models.contributor import Contributor
@@ -166,20 +167,25 @@ def get_experiment_score_sets(
166167
.filter(~ScoreSet.superseding_score_set.has())
167168
.all()
168169
)
169-
score_set_result[:] = [
170-
score_set for score_set in score_set_result if has_permission(user_data, score_set, Action.READ).permitted
171-
]
172170

173-
if not score_set_result:
171+
filter_superseded_score_set_tails = [
172+
find_superseded_score_set_tail(
173+
score_set,
174+
Action.READ,
175+
user_data
176+
) for score_set in score_set_result
177+
]
178+
filtered_score_sets = [score_set for score_set in filter_superseded_score_set_tails if score_set is not None]
179+
if not filtered_score_sets:
174180
save_to_logging_context({"associated_resources": []})
175181
logger.info(msg="No score sets are associated with the requested experiment.", extra=logging_context())
176182

177183
raise HTTPException(status_code=404, detail="no associated score sets")
178184
else:
179-
score_set_result.sort(key=attrgetter("urn"))
185+
filtered_score_sets.sort(key=attrgetter("urn"))
180186
save_to_logging_context({"associated_resources": [item.urn for item in score_set_result]})
181187

182-
return score_set_result
188+
return filtered_score_sets
183189

184190

185191
@router.post(

src/mavedb/routers/score_sets.py

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
logging_context,
3535
save_to_logging_context,
3636
)
37-
from mavedb.lib.permissions import Action, assert_permission
37+
from mavedb.lib.permissions import Action, assert_permission, has_permission
3838
from mavedb.lib.score_sets import (
3939
csv_data_to_df,
4040
find_meta_analyses_for_experiment_sets,
@@ -43,6 +43,7 @@
4343
variants_to_csv_rows,
4444
)
4545
from mavedb.lib.score_sets import (
46+
fetch_superseding_score_set_in_search_result,
4647
search_score_sets as _search_score_sets,
4748
refresh_variant_urns,
4849
)
@@ -109,6 +110,10 @@ async def fetch_score_set_by_urn(
109110
raise HTTPException(status_code=404, detail=f"score set with URN '{urn}' not found")
110111

111112
assert_permission(user, item, Action.READ)
113+
114+
if item.superseding_score_set and not has_permission(user, item.superseding_score_set, Action.READ).permitted:
115+
item.superseding_score_set = None
116+
112117
return item
113118

114119

@@ -121,11 +126,16 @@ async def fetch_score_set_by_urn(
121126

122127

123128
@router.post("/score-sets/search", status_code=200, response_model=list[score_set.ShortScoreSet])
124-
def search_score_sets(search: ScoreSetsSearch, db: Session = Depends(deps.get_db)) -> Any: # = Body(..., embed=True),
129+
def search_score_sets(
130+
search: ScoreSetsSearch,
131+
db: Session = Depends(deps.get_db),
132+
user_data: Optional[UserData] = Depends(get_current_user),
133+
) -> Any: # = Body(..., embed=True),
125134
"""
126135
Search score sets.
127136
"""
128-
return _search_score_sets(db, None, search)
137+
score_sets = _search_score_sets(db, None, search)
138+
return fetch_superseding_score_set_in_search_result(score_sets, user_data, search)
129139

130140

131141
@router.post(
@@ -141,7 +151,8 @@ def search_my_score_sets(
141151
"""
142152
Search score sets created by the current user..
143153
"""
144-
return _search_score_sets(db, user_data.user, search)
154+
score_sets = _search_score_sets(db, user_data.user, search)
155+
return fetch_superseding_score_set_in_search_result(score_sets, user_data, search)
145156

146157

147158
@router.get(
@@ -301,10 +312,10 @@ def get_score_set_mapped_variants(
301312

302313
mapped_variants = (
303314
db.query(MappedVariant)
304-
.filter(ScoreSet.urn == urn)
305-
.filter(ScoreSet.id == Variant.score_set_id)
306-
.filter(Variant.id == MappedVariant.variant_id)
307-
.all()
315+
.filter(ScoreSet.urn == urn)
316+
.filter(ScoreSet.id == Variant.score_set_id)
317+
.filter(Variant.id == MappedVariant.variant_id)
318+
.all()
308319
)
309320

310321
if not mapped_variants:
@@ -471,9 +482,10 @@ async def create_score_set(
471482
for identifier in item_create.primary_publication_identifiers or []
472483
]
473484
publication_identifiers = [
474-
await find_or_create_publication_identifier(db, identifier.identifier, identifier.db_name)
475-
for identifier in item_create.secondary_publication_identifiers or []
476-
] + primary_publication_identifiers
485+
await find_or_create_publication_identifier(db, identifier.identifier,
486+
identifier.db_name)
487+
for identifier in item_create.secondary_publication_identifiers or []
488+
] + primary_publication_identifiers
477489

478490
# create a temporary `primary` attribute on each of our publications that indicates
479491
# to our association proxy whether it is a primary publication or not

tests/routers/test_experiments.py

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
TEST_MEDRXIV_IDENTIFIER,
2525
TEST_MINIMAL_EXPERIMENT,
2626
TEST_MINIMAL_EXPERIMENT_RESPONSE,
27+
TEST_MINIMAL_SEQ_SCORESET,
2728
TEST_ORCID_ID,
2829
TEST_PUBMED_IDENTIFIER,
2930
TEST_PUBMED_URL_IDENTIFIER,
@@ -1072,6 +1073,112 @@ def test_search_score_sets_for_experiments(session, client, setup_router_db, dat
10721073
assert response.json()[0]["urn"] == published_score_set["urn"]
10731074

10741075

1076+
# Creator created a superseding score set but not published it yet.
1077+
def test_owner_searches_score_sets_with_unpublished_superseding_score_sets_for_experiments(session, client, setup_router_db, data_files, data_provider):
1078+
experiment = create_experiment(client)
1079+
unpublished_score_set = create_seq_score_set_with_variants(
1080+
client, session, data_provider, experiment["urn"], data_files / "scores.csv"
1081+
)
1082+
publish_score_set_response = client.post(f"/api/v1/score-sets/{unpublished_score_set['urn']}/publish")
1083+
assert publish_score_set_response.status_code == 200
1084+
published_score_set = publish_score_set_response.json()
1085+
score_set_post_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET)
1086+
score_set_post_payload["experimentUrn"] = published_score_set["experiment"]["urn"]
1087+
score_set_post_payload["supersededScoreSetUrn"] = published_score_set["urn"]
1088+
superseding_score_set_response = client.post("/api/v1/score-sets/", json=score_set_post_payload)
1089+
assert superseding_score_set_response.status_code == 200
1090+
superseding_score_set = superseding_score_set_response.json()
1091+
1092+
# On score set publication, the experiment will get a new urn
1093+
experiment_urn = published_score_set["experiment"]["urn"]
1094+
response = client.get(f"/api/v1/experiments/{experiment_urn}/score-sets")
1095+
assert response.status_code == 200
1096+
assert len(response.json()) == 1
1097+
assert response.json()[0]["urn"] == superseding_score_set["urn"]
1098+
1099+
1100+
def test_non_owner_searches_score_sets_with_unpublished_superseding_score_sets_for_experiments(session, client, setup_router_db, data_files, data_provider):
1101+
experiment = create_experiment(client)
1102+
unpublished_score_set = create_seq_score_set_with_variants(
1103+
client, session, data_provider, experiment["urn"], data_files / "scores.csv"
1104+
)
1105+
publish_score_set_response = client.post(f"/api/v1/score-sets/{unpublished_score_set['urn']}/publish")
1106+
assert publish_score_set_response.status_code == 200
1107+
published_score_set = publish_score_set_response.json()
1108+
score_set_post_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET)
1109+
score_set_post_payload["experimentUrn"] = published_score_set["experiment"]["urn"]
1110+
score_set_post_payload["supersededScoreSetUrn"] = published_score_set["urn"]
1111+
superseding_score_set_response = client.post("/api/v1/score-sets/", json=score_set_post_payload)
1112+
assert superseding_score_set_response.status_code == 200
1113+
superseding_score_set = superseding_score_set_response.json()
1114+
change_ownership(session, published_score_set["urn"], ScoreSetDbModel)
1115+
change_ownership(session, superseding_score_set["urn"], ScoreSetDbModel)
1116+
# On score set publication, the experiment will get a new urn
1117+
experiment_urn = published_score_set["experiment"]["urn"]
1118+
response = client.get(f"/api/v1/experiments/{experiment_urn}/score-sets")
1119+
assert response.status_code == 200
1120+
assert len(response.json()) == 1
1121+
assert response.json()[0]["urn"] == published_score_set["urn"]
1122+
1123+
1124+
def test_owner_searches_published_superseding_score_sets_for_experiments(session, client, setup_router_db, data_files, data_provider):
1125+
experiment = create_experiment(client)
1126+
unpublished_score_set = create_seq_score_set_with_variants(
1127+
client, session, data_provider, experiment["urn"], data_files / "scores.csv"
1128+
)
1129+
publish_score_set_response = client.post(f"/api/v1/score-sets/{unpublished_score_set['urn']}/publish")
1130+
assert publish_score_set_response.status_code == 200
1131+
published_score_set = publish_score_set_response.json()
1132+
1133+
superseding_score_set = create_seq_score_set_with_variants(
1134+
client,
1135+
session,
1136+
data_provider,
1137+
published_score_set["experiment"]["urn"],
1138+
data_files / "scores.csv",
1139+
update={"supersededScoreSetUrn": published_score_set["urn"]},
1140+
)
1141+
published_superseding_score_set_response = client.post(f"/api/v1/score-sets/{superseding_score_set['urn']}/publish")
1142+
assert published_superseding_score_set_response.status_code == 200
1143+
published_superseding_score_set = published_superseding_score_set_response.json()
1144+
# On score set publication, the experiment will get a new urn
1145+
experiment_urn = published_score_set["experiment"]["urn"]
1146+
response = client.get(f"/api/v1/experiments/{experiment_urn}/score-sets")
1147+
assert response.status_code == 200
1148+
assert len(response.json()) == 1
1149+
assert response.json()[0]["urn"] == published_superseding_score_set["urn"]
1150+
1151+
1152+
def test_non_owner_searches_published_superseding_score_sets_for_experiments(session, client, setup_router_db, data_files, data_provider):
1153+
experiment = create_experiment(client)
1154+
unpublished_score_set = create_seq_score_set_with_variants(
1155+
client, session, data_provider, experiment["urn"], data_files / "scores.csv"
1156+
)
1157+
publish_score_set_response = client.post(f"/api/v1/score-sets/{unpublished_score_set['urn']}/publish")
1158+
assert publish_score_set_response.status_code == 200
1159+
published_score_set = publish_score_set_response.json()
1160+
1161+
superseding_score_set = create_seq_score_set_with_variants(
1162+
client,
1163+
session,
1164+
data_provider,
1165+
published_score_set["experiment"]["urn"],
1166+
data_files / "scores.csv",
1167+
update={"supersededScoreSetUrn": published_score_set["urn"]},
1168+
)
1169+
published_superseding_score_set_response = client.post(f"/api/v1/score-sets/{superseding_score_set['urn']}/publish")
1170+
assert published_superseding_score_set_response.status_code == 200
1171+
published_superseding_score_set = published_superseding_score_set_response.json()
1172+
change_ownership(session, published_score_set["urn"], ScoreSetDbModel)
1173+
change_ownership(session, published_superseding_score_set["urn"], ScoreSetDbModel)
1174+
# On score set publication, the experiment will get a new urn
1175+
experiment_urn = published_score_set["experiment"]["urn"]
1176+
response = client.get(f"/api/v1/experiments/{experiment_urn}/score-sets")
1177+
assert response.status_code == 200
1178+
assert len(response.json()) == 1
1179+
assert response.json()[0]["urn"] == published_superseding_score_set["urn"]
1180+
1181+
10751182
def test_search_score_sets_for_contributor_experiments(session, client, setup_router_db, data_files, data_provider):
10761183
experiment = create_experiment(client)
10771184
score_set_pub = create_seq_score_set_with_variants(

0 commit comments

Comments
 (0)