3535from mavedb .models .experiment_controlled_keyword import ExperimentControlledKeywordAssociation
3636from mavedb .models .experiment_publication_identifier import ExperimentPublicationIdentifierAssociation
3737from mavedb .models .experiment_set import ExperimentSet
38+ from mavedb .models .gnomad_variant import GnomADVariant
3839from mavedb .models .publication_identifier import PublicationIdentifier
3940from mavedb .models .refseq_identifier import RefseqIdentifier
4041from mavedb .models .refseq_offset import RefseqOffset
@@ -407,6 +408,8 @@ def get_score_set_variants_as_csv(
407408 drop_na_columns : Optional [bool ] = None ,
408409 include_custom_columns : bool = True ,
409410 include_post_mapped_hgvs : bool = False ,
411+ include_gnomad : bool = False ,
412+ gnomad_version : Optional [Literal ["v4.1" ]] = None ,
410413) -> str :
411414 """
412415 Get the variant data from a score set as a CSV string.
@@ -430,6 +433,8 @@ def get_score_set_variants_as_csv(
430433 include_post_mapped_hgvs : bool, optional
431434 Whether to include post-mapped HGVS notations in the output. Defaults to False. If True, the output will include
432435 columns for both post-mapped HGVS genomic (g.) and protein (p.) notations.
436+ include_gnomad: bool, optional
437+ Whether to include gnomAD allele frequency data in the output. Defaults to False.
433438
434439 Returns
435440 _______
@@ -444,6 +449,8 @@ def get_score_set_variants_as_csv(
444449 if include_post_mapped_hgvs :
445450 columns .append ("post_mapped_hgvs_g" )
446451 columns .append ("post_mapped_hgvs_p" )
452+ if include_gnomad :
453+ columns .append ("gnomad_af" )
447454
448455 if include_custom_columns :
449456 custom_columns = [str (x ) for x in list (score_set .dataset_columns .get (custom_columns_set , []))]
@@ -453,8 +460,45 @@ def get_score_set_variants_as_csv(
453460
454461 variants : Sequence [Variant ] = []
455462 mappings : Optional [list [Optional [MappedVariant ]]] = None
463+ gnomad_data : Optional [list [Optional [GnomADVariant ]]] = None
456464
457- if include_post_mapped_hgvs :
465+ if include_gnomad :
466+ variants_mappings_and_gnomad_query = (
467+ select (Variant , MappedVariant , GnomADVariant )
468+ .join (
469+ MappedVariant ,
470+ and_ (Variant .id == MappedVariant .variant_id , MappedVariant .current .is_ (True )),
471+ isouter = True ,
472+ )
473+ .join (MappedVariant .gnomad_variants .of_type (GnomADVariant ), isouter = True )
474+ .where (
475+ and_ (
476+ Variant .score_set_id == score_set .id ,
477+ or_ (
478+ and_ (
479+ GnomADVariant .db_name == "gnomAD" ,
480+ GnomADVariant .db_version == (gnomad_version if gnomad_version else "v4.1" ),
481+ ),
482+ GnomADVariant .id .is_ (None ),
483+ ),
484+ )
485+ )
486+ .order_by (cast (func .split_part (Variant .urn , "#" , 2 ), Integer ))
487+ )
488+ if start :
489+ variants_mappings_and_gnomad_query = variants_mappings_and_gnomad_query .offset (start )
490+ if limit :
491+ variants_mappings_and_gnomad_query = variants_mappings_and_gnomad_query .limit (limit )
492+ variants_mappings_and_gnomad = db .execute (variants_mappings_and_gnomad_query ).all ()
493+
494+ variants = []
495+ mappings = []
496+ gnomad_data = []
497+ for variant , mapping , gnomad in variants_mappings_and_gnomad :
498+ variants .append (variant )
499+ mappings .append (mapping )
500+ gnomad_data .append (gnomad )
501+ elif include_post_mapped_hgvs :
458502 variants_and_mappings_query = (
459503 select (Variant , MappedVariant )
460504 .join (
@@ -488,7 +532,9 @@ def get_score_set_variants_as_csv(
488532 variants_query = variants_query .limit (limit )
489533 variants = db .scalars (variants_query ).all ()
490534
491- rows_data = variants_to_csv_rows (variants , columns = columns , dtype = type_column , mappings = mappings ) # type: ignore
535+ rows_data = variants_to_csv_rows (
536+ variants , columns = columns , dtype = type_column , mappings = mappings , gnomad_data = gnomad_data
537+ ) # type: ignore
492538 if drop_na_columns :
493539 rows_data , columns = drop_na_columns_from_csv_file_rows (rows_data , columns )
494540
@@ -534,6 +580,7 @@ def variant_to_csv_row(
534580 columns : list [str ],
535581 dtype : str ,
536582 mapping : Optional [MappedVariant ] = None ,
583+ gnomad_data : Optional [GnomADVariant ] = None ,
537584 na_rep = "NA" ,
538585) -> dict [str , Any ]:
539586 """
@@ -547,6 +594,10 @@ def variant_to_csv_row(
547594 Columns to serialize.
548595 dtype : str, {'scores', 'counts'}
549596 The type of data requested. Either the 'score_data' or 'count_data'.
597+ mapping : variant.models.MappedVariant, optional
598+ Mapped variant corresponding to the variant.
599+ gnomad_data : variant.models.GnomADVariant, optional
600+ gnomAD variant data corresponding to the variant.
550601 na_rep : str
551602 String to represent null values.
552603
@@ -576,6 +627,8 @@ def variant_to_csv_row(
576627 value = hgvs_str
577628 else :
578629 value = ""
630+ elif column_key == "gnomad_af" :
631+ value = str (gnomad_data .allele_frequency ) if gnomad_data else ""
579632 else :
580633 parent = variant .data .get (dtype ) if variant .data else None
581634 value = str (parent .get (column_key )) if parent else na_rep
@@ -591,6 +644,7 @@ def variants_to_csv_rows(
591644 columns : list [str ],
592645 dtype : str ,
593646 mappings : Optional [Sequence [Optional [MappedVariant ]]] = None ,
647+ gnomad_data : Optional [Sequence [Optional [GnomADVariant ]]] = None ,
594648 na_rep = "NA" ,
595649) -> Iterable [dict [str , Any ]]:
596650 """
@@ -604,14 +658,25 @@ def variants_to_csv_rows(
604658 Columns to serialize.
605659 dtype : str, {'scores', 'counts'}
606660 The type of data requested. Either the 'score_data' or 'count_data'.
661+ mappings : list[Optional[variant.models.MappedVariant]], optional
662+ List of mapped variants corresponding to the variants.
663+ gnomad_data : list[Optional[variant.models.GnomADVariant]], optional
664+ List of gnomAD variant data corresponding to the variants.
607665 na_rep : str
608666 String to represent null values.
609667
610668 Returns
611669 -------
612670 list[dict[str, Any]]
613671 """
614- if mappings is not None :
672+ if mappings is not None and gnomad_data is not None :
673+ return map (
674+ lambda zipped : variant_to_csv_row (
675+ zipped [0 ], columns , dtype , mapping = zipped [1 ], gnomad_data = zipped [2 ], na_rep = na_rep
676+ ),
677+ zip (variants , mappings , gnomad_data ),
678+ )
679+ elif mappings is not None :
615680 return map (
616681 lambda pair : variant_to_csv_row (pair [0 ], columns , dtype , mapping = pair [1 ], na_rep = na_rep ),
617682 zip (variants , mappings ),
0 commit comments