Merge pull request #366 from VariantEffect/estelle/debugShowTmpSupersedingScoreSet

EstelleDa · web-flow · commit c95060d8377c · 2025-02-19T17:49:08.000+11:00
Fix current version bug
diff --git a/src/mavedb/lib/score_sets.py b/src/mavedb/lib/score_sets.py
@@ -2,7 +2,8 @@
 import io
 import logging
 import re
-from typing import Any, BinaryIO, Iterable, Optional, Sequence
+from operator import attrgetter
+from typing import Any, BinaryIO, Iterable, Optional, TYPE_CHECKING, Sequence
 
 import numpy as np
 import pandas as pd
@@ -48,6 +49,10 @@
 from mavedb.models.variant import Variant
 from mavedb.view_models.search import ScoreSetsSearch
 
+if TYPE_CHECKING:
+    from mavedb.lib.authentication import UserData
+    from mavedb.lib.permissions import Action
+
 VariantData = dict[str, Optional[dict[str, dict]]]
 
 logger = logging.getLogger(__name__)
@@ -69,9 +74,6 @@ def search_score_sets(db: Session, owner_or_contributor: Optional[User], search:
     query = db.query(ScoreSet)  # \
     # .filter(ScoreSet.private.is_(False))
 
-    #  filter out the score sets that are replaced by other score sets
-    query = query.filter(~ScoreSet.superseding_score_set.has())
-
     if owner_or_contributor is not None:
         query = query.filter(
             or_(
@@ -263,6 +265,41 @@ def search_score_sets(db: Session, owner_or_contributor: Optional[User], search:
     return score_sets  # filter_visible_score_sets(score_sets)
 
 
+def fetch_superseding_score_set_in_search_result(
+    score_sets: list[ScoreSet],
+    requesting_user: Optional["UserData"],
+    search: ScoreSetsSearch) -> list[ScoreSet]:
+    """
+    Remove superseded score set from search results.
+    Check whether all of the score set are correct versions.
+    """
+    from mavedb.lib.permissions import Action
+    if search.published:
+        filtered_score_sets_tail = [
+            find_publish_or_private_superseded_score_set_tail(
+                score_set,
+                Action.READ,
+                requesting_user,
+                search.published
+            ) for score_set in score_sets
+        ]
+    else:
+        filtered_score_sets_tail = [
+            find_superseded_score_set_tail(
+                score_set,
+                Action.READ,
+                requesting_user
+            ) for score_set in score_sets
+        ]
+    # Remove None item.
+    filtered_score_sets = [score_set for score_set in filtered_score_sets_tail if score_set is not None]
+    if filtered_score_sets:
+        final_score_sets = sorted(set(filtered_score_sets), key=attrgetter("urn"))
+    else:
+        final_score_sets = []
+    return final_score_sets
+
+
 def find_meta_analyses_for_experiment_sets(db: Session, urns: list[str]) -> list[ScoreSet]:
     """
     Find all score sets that are meta-analyses for score sets from a specified collection of experiment sets.
@@ -307,6 +344,60 @@ def find_meta_analyses_for_experiment_sets(db: Session, urns: list[str]) -> list
     )
 
 
+def find_superseded_score_set_tail(
+        score_set: ScoreSet,
+        action: Optional["Action"] = None,
+        user_data: Optional["UserData"] = None) -> Optional[ScoreSet]:
+    from mavedb.lib.permissions import has_permission
+    while score_set.superseding_score_set is not None:
+        next_score_set_in_chain = score_set.superseding_score_set
+
+        # If we were given a permission to check and the next score set in the chain does not have that permission,
+        # pretend like we have reached the end of the chain. Otherwise, continue to the next score set.
+        if action is not None and not has_permission(user_data, next_score_set_in_chain, action).permitted:
+            return score_set
+
+        score_set = next_score_set_in_chain
+
+    # Handle unpublished superseding score set case.
+    # The score set has a published superseded score set but has not superseding score set.
+    if action is not None and not has_permission(user_data, score_set, action).permitted:
+        while score_set.superseded_score_set is not None:
+            next_score_set_in_chain = score_set.superseded_score_set
+            if has_permission(user_data, next_score_set_in_chain, action).permitted:
+                return next_score_set_in_chain
+            else:
+                score_set = next_score_set_in_chain
+        return None
+
+    return score_set
+
+
+def find_publish_or_private_superseded_score_set_tail(
+        score_set: ScoreSet,
+        action: Optional["Action"] = None,
+        user_data: Optional["UserData"] = None,
+        publish: bool = True) -> Optional[ScoreSet]:
+    from mavedb.lib.permissions import has_permission
+    if publish:
+        while score_set.superseding_score_set is not None:
+            next_score_set_in_chain = score_set.superseding_score_set
+            # Find the final published one.
+            if action is not None and has_permission(user_data, score_set, action).permitted \
+                    and next_score_set_in_chain.published_date is None:
+                return score_set
+            score_set = next_score_set_in_chain
+    else:
+        # Unpublished score set should not be superseded.
+        # It should not have superseding score set, but possible have superseded score set.
+        if action is not None and score_set.published_date is None \
+                and has_permission(user_data, score_set, action).permitted:
+            return score_set
+        else:
+            return None
+    return score_set
+
+
 def get_score_set_counts_as_csv(
     db: Session,
     score_set: ScoreSet,
diff --git a/src/mavedb/lib/validation/urn_re.py b/src/mavedb/lib/validation/urn_re.py
@@ -8,6 +8,10 @@
 MAVEDB_TMP_URN_PATTERN = r"tmp:[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"
 MAVEDB_TMP_URN_RE = re.compile(MAVEDB_TMP_URN_PATTERN)
 
+# Old temp URN
+MAVEDB_OLD_TMP_URN_PATTERN = r"^tmp:[A-Za-z0-9]{16}$"
+MAVEDB_OLD_TMP_URN_RE = re.compile(MAVEDB_OLD_TMP_URN_PATTERN)
+
 # Experiment set URN
 MAVEDB_EXPERIMENT_SET_URN_PATTERN = rf"urn:{MAVEDB_URN_NAMESPACE}:\d{{{MAVEDB_EXPERIMENT_SET_URN_DIGITS}}}"
 MAVEDB_EXPERIMENT_SET_URN_RE = re.compile(MAVEDB_EXPERIMENT_SET_URN_PATTERN)
diff --git a/src/mavedb/routers/experiments.py b/src/mavedb/routers/experiments.py
@@ -23,7 +23,8 @@
 from mavedb.lib.keywords import search_keyword
 from mavedb.lib.logging import LoggedRoute
 from mavedb.lib.logging.context import logging_context, save_to_logging_context
-from mavedb.lib.permissions import Action, assert_permission, has_permission
+from mavedb.lib.permissions import Action, assert_permission
+from mavedb.lib.score_sets import find_superseded_score_set_tail
 from mavedb.lib.validation.exceptions import ValidationError
 from mavedb.lib.validation.keywords import validate_keyword_list
 from mavedb.models.contributor import Contributor
@@ -166,20 +167,25 @@ def get_experiment_score_sets(
         .filter(~ScoreSet.superseding_score_set.has())
         .all()
     )
-    score_set_result[:] = [
-        score_set for score_set in score_set_result if has_permission(user_data, score_set, Action.READ).permitted
-    ]
 
-    if not score_set_result:
+    filter_superseded_score_set_tails = [
+        find_superseded_score_set_tail(
+            score_set,
+            Action.READ,
+            user_data
+        ) for score_set in score_set_result
+    ]
+    filtered_score_sets = [score_set for score_set in filter_superseded_score_set_tails if score_set is not None]
+    if not filtered_score_sets:
         save_to_logging_context({"associated_resources": []})
         logger.info(msg="No score sets are associated with the requested experiment.", extra=logging_context())
 
         raise HTTPException(status_code=404, detail="no associated score sets")
     else:
-        score_set_result.sort(key=attrgetter("urn"))
+        filtered_score_sets.sort(key=attrgetter("urn"))
         save_to_logging_context({"associated_resources": [item.urn for item in score_set_result]})
 
-    return score_set_result
+    return filtered_score_sets
 
 
 @router.post(
diff --git a/src/mavedb/routers/score_sets.py b/src/mavedb/routers/score_sets.py
@@ -34,7 +34,7 @@
     logging_context,
     save_to_logging_context,
 )
-from mavedb.lib.permissions import Action, assert_permission
+from mavedb.lib.permissions import Action, assert_permission, has_permission
 from mavedb.lib.score_sets import (
     csv_data_to_df,
     find_meta_analyses_for_experiment_sets,
@@ -43,6 +43,7 @@
     variants_to_csv_rows,
 )
 from mavedb.lib.score_sets import (
+    fetch_superseding_score_set_in_search_result,
     search_score_sets as _search_score_sets,
     refresh_variant_urns,
 )
@@ -109,6 +110,10 @@ async def fetch_score_set_by_urn(
         raise HTTPException(status_code=404, detail=f"score set with URN '{urn}' not found")
 
     assert_permission(user, item, Action.READ)
+
+    if item.superseding_score_set and not has_permission(user, item.superseding_score_set, Action.READ).permitted:
+        item.superseding_score_set = None
+
     return item
 
 
@@ -121,11 +126,16 @@ async def fetch_score_set_by_urn(
 
 
 @router.post("/score-sets/search", status_code=200, response_model=list[score_set.ShortScoreSet])
-def search_score_sets(search: ScoreSetsSearch, db: Session = Depends(deps.get_db)) -> Any:  # = Body(..., embed=True),
+def search_score_sets(
+    search: ScoreSetsSearch,
+    db: Session = Depends(deps.get_db),
+    user_data: Optional[UserData] = Depends(get_current_user),
+) -> Any:  # = Body(..., embed=True),
     """
     Search score sets.
     """
-    return _search_score_sets(db, None, search)
+    score_sets = _search_score_sets(db, None, search)
+    return fetch_superseding_score_set_in_search_result(score_sets, user_data, search)
 
 
 @router.post(
@@ -141,7 +151,8 @@ def search_my_score_sets(
     """
     Search score sets created by the current user..
     """
-    return _search_score_sets(db, user_data.user, search)
+    score_sets = _search_score_sets(db, user_data.user, search)
+    return fetch_superseding_score_set_in_search_result(score_sets, user_data, search)
 
 
 @router.get(
@@ -301,10 +312,10 @@ def get_score_set_mapped_variants(
 
     mapped_variants = (
         db.query(MappedVariant)
-        .filter(ScoreSet.urn == urn)
-        .filter(ScoreSet.id == Variant.score_set_id)
-        .filter(Variant.id == MappedVariant.variant_id)
-        .all()
+            .filter(ScoreSet.urn == urn)
+            .filter(ScoreSet.id == Variant.score_set_id)
+            .filter(Variant.id == MappedVariant.variant_id)
+            .all()
     )
 
     if not mapped_variants:
@@ -471,9 +482,10 @@ async def create_score_set(
         for identifier in item_create.primary_publication_identifiers or []
     ]
     publication_identifiers = [
-        await find_or_create_publication_identifier(db, identifier.identifier, identifier.db_name)
-        for identifier in item_create.secondary_publication_identifiers or []
-    ] + primary_publication_identifiers
+                                  await find_or_create_publication_identifier(db, identifier.identifier,
+                                                                              identifier.db_name)
+                                  for identifier in item_create.secondary_publication_identifiers or []
+                              ] + primary_publication_identifiers
 
     # create a temporary `primary` attribute on each of our publications that indicates
     # to our association proxy whether it is a primary publication or not
diff --git a/tests/routers/test_experiments.py b/tests/routers/test_experiments.py
@@ -24,6 +24,7 @@
     TEST_MEDRXIV_IDENTIFIER,
     TEST_MINIMAL_EXPERIMENT,
     TEST_MINIMAL_EXPERIMENT_RESPONSE,
+    TEST_MINIMAL_SEQ_SCORESET,
     TEST_ORCID_ID,
     TEST_PUBMED_IDENTIFIER,
     TEST_PUBMED_URL_IDENTIFIER,
@@ -1072,6 +1073,112 @@ def test_search_score_sets_for_experiments(session, client, setup_router_db, dat
     assert response.json()[0]["urn"] == published_score_set["urn"]
 
 
+# Creator created a superseding score set but not published it yet.
+def test_owner_searches_score_sets_with_unpublished_superseding_score_sets_for_experiments(session, client, setup_router_db, data_files, data_provider):
+    experiment = create_experiment(client)
+    unpublished_score_set = create_seq_score_set_with_variants(
+        client, session, data_provider, experiment["urn"], data_files / "scores.csv"
+    )
+    publish_score_set_response = client.post(f"/api/v1/score-sets/{unpublished_score_set['urn']}/publish")
+    assert publish_score_set_response.status_code == 200
+    published_score_set = publish_score_set_response.json()
+    score_set_post_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET)
+    score_set_post_payload["experimentUrn"] = published_score_set["experiment"]["urn"]
+    score_set_post_payload["supersededScoreSetUrn"] = published_score_set["urn"]
+    superseding_score_set_response = client.post("/api/v1/score-sets/", json=score_set_post_payload)
+    assert superseding_score_set_response.status_code == 200
+    superseding_score_set = superseding_score_set_response.json()
+
+    # On score set publication, the experiment will get a new urn
+    experiment_urn = published_score_set["experiment"]["urn"]
+    response = client.get(f"/api/v1/experiments/{experiment_urn}/score-sets")
+    assert response.status_code == 200
+    assert len(response.json()) == 1
+    assert response.json()[0]["urn"] == superseding_score_set["urn"]
+
+
+def test_non_owner_searches_score_sets_with_unpublished_superseding_score_sets_for_experiments(session, client, setup_router_db, data_files, data_provider):
+    experiment = create_experiment(client)
+    unpublished_score_set = create_seq_score_set_with_variants(
+        client, session, data_provider, experiment["urn"], data_files / "scores.csv"
+    )
+    publish_score_set_response = client.post(f"/api/v1/score-sets/{unpublished_score_set['urn']}/publish")
+    assert publish_score_set_response.status_code == 200
+    published_score_set = publish_score_set_response.json()
+    score_set_post_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET)
+    score_set_post_payload["experimentUrn"] = published_score_set["experiment"]["urn"]
+    score_set_post_payload["supersededScoreSetUrn"] = published_score_set["urn"]
+    superseding_score_set_response = client.post("/api/v1/score-sets/", json=score_set_post_payload)
+    assert superseding_score_set_response.status_code == 200
+    superseding_score_set = superseding_score_set_response.json()
+    change_ownership(session, published_score_set["urn"], ScoreSetDbModel)
+    change_ownership(session, superseding_score_set["urn"], ScoreSetDbModel)
+    # On score set publication, the experiment will get a new urn
+    experiment_urn = published_score_set["experiment"]["urn"]
+    response = client.get(f"/api/v1/experiments/{experiment_urn}/score-sets")
+    assert response.status_code == 200
+    assert len(response.json()) == 1
+    assert response.json()[0]["urn"] == published_score_set["urn"]
+
+
+def test_owner_searches_published_superseding_score_sets_for_experiments(session, client, setup_router_db, data_files, data_provider):
+    experiment = create_experiment(client)
+    unpublished_score_set = create_seq_score_set_with_variants(
+        client, session, data_provider, experiment["urn"], data_files / "scores.csv"
+    )
+    publish_score_set_response = client.post(f"/api/v1/score-sets/{unpublished_score_set['urn']}/publish")
+    assert publish_score_set_response.status_code == 200
+    published_score_set = publish_score_set_response.json()
+
+    superseding_score_set = create_seq_score_set_with_variants(
+        client,
+        session,
+        data_provider,
+        published_score_set["experiment"]["urn"],
+        data_files / "scores.csv",
+        update={"supersededScoreSetUrn": published_score_set["urn"]},
+    )
+    published_superseding_score_set_response = client.post(f"/api/v1/score-sets/{superseding_score_set['urn']}/publish")
+    assert published_superseding_score_set_response.status_code == 200
+    published_superseding_score_set = published_superseding_score_set_response.json()
+    # On score set publication, the experiment will get a new urn
+    experiment_urn = published_score_set["experiment"]["urn"]
+    response = client.get(f"/api/v1/experiments/{experiment_urn}/score-sets")
+    assert response.status_code == 200
+    assert len(response.json()) == 1
+    assert response.json()[0]["urn"] == published_superseding_score_set["urn"]
+
+
+def test_non_owner_searches_published_superseding_score_sets_for_experiments(session, client, setup_router_db, data_files, data_provider):
+    experiment = create_experiment(client)
+    unpublished_score_set = create_seq_score_set_with_variants(
+        client, session, data_provider, experiment["urn"], data_files / "scores.csv"
+    )
+    publish_score_set_response = client.post(f"/api/v1/score-sets/{unpublished_score_set['urn']}/publish")
+    assert publish_score_set_response.status_code == 200
+    published_score_set = publish_score_set_response.json()
+
+    superseding_score_set = create_seq_score_set_with_variants(
+        client,
+        session,
+        data_provider,
+        published_score_set["experiment"]["urn"],
+        data_files / "scores.csv",
+        update={"supersededScoreSetUrn": published_score_set["urn"]},
+    )
+    published_superseding_score_set_response = client.post(f"/api/v1/score-sets/{superseding_score_set['urn']}/publish")
+    assert published_superseding_score_set_response.status_code == 200
+    published_superseding_score_set = published_superseding_score_set_response.json()
+    change_ownership(session, published_score_set["urn"], ScoreSetDbModel)
+    change_ownership(session, published_superseding_score_set["urn"], ScoreSetDbModel)
+    # On score set publication, the experiment will get a new urn
+    experiment_urn = published_score_set["experiment"]["urn"]
+    response = client.get(f"/api/v1/experiments/{experiment_urn}/score-sets")
+    assert response.status_code == 200
+    assert len(response.json()) == 1
+    assert response.json()[0]["urn"] == published_superseding_score_set["urn"]
+
+
 def test_search_score_sets_for_contributor_experiments(session, client, setup_router_db, data_files, data_provider):
     experiment = create_experiment(client)
     score_set_pub = create_seq_score_set_with_variants(
diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py