22import io
33import logging
44import re
5- from typing import Any , BinaryIO , Iterable , Optional , Sequence
5+ from operator import attrgetter
6+ from typing import Any , BinaryIO , Iterable , Optional , TYPE_CHECKING , Sequence
67
78import numpy as np
89import pandas as pd
2122)
2223from mavedb .lib .mave .utils import is_csv_null
2324from mavedb .lib .validation .constants .general import null_values_list
25+ from mavedb .lib .validation .utilities import is_null as validate_is_null
2426from mavedb .models .contributor import Contributor
2527from mavedb .models .controlled_keyword import ControlledKeyword
2628from mavedb .models .doi_identifier import DoiIdentifier
4749from mavedb .models .variant import Variant
4850from mavedb .view_models .search import ScoreSetsSearch
4951
52+ if TYPE_CHECKING :
53+ from mavedb .lib .authentication import UserData
54+ from mavedb .lib .permissions import Action
55+
5056VariantData = dict [str , Optional [dict [str , dict ]]]
5157
5258logger = logging .getLogger (__name__ )
@@ -68,9 +74,6 @@ def search_score_sets(db: Session, owner_or_contributor: Optional[User], search:
6874 query = db .query (ScoreSet ) # \
6975 # .filter(ScoreSet.private.is_(False))
7076
71- # filter out the score sets that are replaced by other score sets
72- query = query .filter (~ ScoreSet .superseding_score_set .has ())
73-
7477 if owner_or_contributor is not None :
7578 query = query .filter (
7679 or_ (
@@ -262,6 +265,41 @@ def search_score_sets(db: Session, owner_or_contributor: Optional[User], search:
262265 return score_sets # filter_visible_score_sets(score_sets)
263266
264267
268+ def fetch_superseding_score_set_in_search_result (
269+ score_sets : list [ScoreSet ],
270+ requesting_user : Optional ["UserData" ],
271+ search : ScoreSetsSearch ) -> list [ScoreSet ]:
272+ """
273+ Remove superseded score set from search results.
274+ Check whether all of the score set are correct versions.
275+ """
276+ from mavedb .lib .permissions import Action
277+ if search .published :
278+ filtered_score_sets_tail = [
279+ find_publish_or_private_superseded_score_set_tail (
280+ score_set ,
281+ Action .READ ,
282+ requesting_user ,
283+ search .published
284+ ) for score_set in score_sets
285+ ]
286+ else :
287+ filtered_score_sets_tail = [
288+ find_superseded_score_set_tail (
289+ score_set ,
290+ Action .READ ,
291+ requesting_user
292+ ) for score_set in score_sets
293+ ]
294+ # Remove None item.
295+ filtered_score_sets = [score_set for score_set in filtered_score_sets_tail if score_set is not None ]
296+ if filtered_score_sets :
297+ final_score_sets = sorted (set (filtered_score_sets ), key = attrgetter ("urn" ))
298+ else :
299+ final_score_sets = []
300+ return final_score_sets
301+
302+
265303def find_meta_analyses_for_experiment_sets (db : Session , urns : list [str ]) -> list [ScoreSet ]:
266304 """
267305 Find all score sets that are meta-analyses for score sets from a specified collection of experiment sets.
@@ -306,11 +344,66 @@ def find_meta_analyses_for_experiment_sets(db: Session, urns: list[str]) -> list
306344 )
307345
308346
347+ def find_superseded_score_set_tail (
348+ score_set : ScoreSet ,
349+ action : Optional ["Action" ] = None ,
350+ user_data : Optional ["UserData" ] = None ) -> Optional [ScoreSet ]:
351+ from mavedb .lib .permissions import has_permission
352+ while score_set .superseding_score_set is not None :
353+ next_score_set_in_chain = score_set .superseding_score_set
354+
355+ # If we were given a permission to check and the next score set in the chain does not have that permission,
356+ # pretend like we have reached the end of the chain. Otherwise, continue to the next score set.
357+ if action is not None and not has_permission (user_data , next_score_set_in_chain , action ).permitted :
358+ return score_set
359+
360+ score_set = next_score_set_in_chain
361+
362+ # Handle unpublished superseding score set case.
363+ # The score set has a published superseded score set but has not superseding score set.
364+ if action is not None and not has_permission (user_data , score_set , action ).permitted :
365+ while score_set .superseded_score_set is not None :
366+ next_score_set_in_chain = score_set .superseded_score_set
367+ if has_permission (user_data , next_score_set_in_chain , action ).permitted :
368+ return next_score_set_in_chain
369+ else :
370+ score_set = next_score_set_in_chain
371+ return None
372+
373+ return score_set
374+
375+
376+ def find_publish_or_private_superseded_score_set_tail (
377+ score_set : ScoreSet ,
378+ action : Optional ["Action" ] = None ,
379+ user_data : Optional ["UserData" ] = None ,
380+ publish : bool = True ) -> Optional [ScoreSet ]:
381+ from mavedb .lib .permissions import has_permission
382+ if publish :
383+ while score_set .superseding_score_set is not None :
384+ next_score_set_in_chain = score_set .superseding_score_set
385+ # Find the final published one.
386+ if action is not None and has_permission (user_data , score_set , action ).permitted \
387+ and next_score_set_in_chain .published_date is None :
388+ return score_set
389+ score_set = next_score_set_in_chain
390+ else :
391+ # Unpublished score set should not be superseded.
392+ # It should not have superseding score set, but possible have superseded score set.
393+ if action is not None and score_set .published_date is None \
394+ and has_permission (user_data , score_set , action ).permitted :
395+ return score_set
396+ else :
397+ return None
398+ return score_set
399+
400+
309401def get_score_set_counts_as_csv (
310402 db : Session ,
311403 score_set : ScoreSet ,
312404 start : Optional [int ] = None ,
313405 limit : Optional [int ] = None ,
406+ drop_na_columns : Optional [bool ] = None ,
314407) -> str :
315408 assert type (score_set .dataset_columns ) is dict
316409 count_columns = [str (x ) for x in list (score_set .dataset_columns .get ("count_columns" , []))]
@@ -329,6 +422,9 @@ def get_score_set_counts_as_csv(
329422 variants = db .scalars (variants_query ).all ()
330423
331424 rows_data = variants_to_csv_rows (variants , columns = columns , dtype = type_column )
425+ if drop_na_columns :
426+ rows_data , columns = drop_na_columns_from_csv_file_rows (rows_data , columns )
427+
332428 stream = io .StringIO ()
333429 writer = csv .DictWriter (stream , fieldnames = columns , quoting = csv .QUOTE_MINIMAL )
334430 writer .writeheader ()
@@ -341,6 +437,7 @@ def get_score_set_scores_as_csv(
341437 score_set : ScoreSet ,
342438 start : Optional [int ] = None ,
343439 limit : Optional [int ] = None ,
440+ drop_na_columns : Optional [bool ] = None ,
344441) -> str :
345442 assert type (score_set .dataset_columns ) is dict
346443 score_columns = [str (x ) for x in list (score_set .dataset_columns .get ("score_columns" , []))]
@@ -359,13 +456,38 @@ def get_score_set_scores_as_csv(
359456 variants = db .scalars (variants_query ).all ()
360457
361458 rows_data = variants_to_csv_rows (variants , columns = columns , dtype = type_column )
459+ if drop_na_columns :
460+ rows_data , columns = drop_na_columns_from_csv_file_rows (rows_data , columns )
461+
362462 stream = io .StringIO ()
363463 writer = csv .DictWriter (stream , fieldnames = columns , quoting = csv .QUOTE_MINIMAL )
364464 writer .writeheader ()
365465 writer .writerows (rows_data )
366466 return stream .getvalue ()
367467
368468
469+ def drop_na_columns_from_csv_file_rows (
470+ rows_data : Iterable [dict [str , Any ]],
471+ columns : list [str ]
472+ ) -> tuple [list [dict [str , Any ]], list [str ]]:
473+ """Process rows_data for downloadable CSV by removing empty columns."""
474+ # Convert map to list.
475+ rows_data = list (rows_data )
476+ columns_to_check = ["hgvs_nt" , "hgvs_splice" , "hgvs_pro" ]
477+ columns_to_remove = []
478+
479+ # Check if all values in a column are None or "NA"
480+ for col in columns_to_check :
481+ if all (validate_is_null (row [col ]) for row in rows_data ):
482+ columns_to_remove .append (col )
483+ for row in rows_data :
484+ row .pop (col , None ) # Remove column from each row
485+
486+ # Remove these columns from the header list
487+ columns = [col for col in columns if col not in columns_to_remove ]
488+ return rows_data , columns
489+
490+
369491null_values_re = re .compile (r"\s+|none|nan|na|undefined|n/a|null|nil" , flags = re .IGNORECASE )
370492
371493
0 commit comments