Skip to content

Commit 7d0fc6e

Browse files
committed
Add a boolean namespaced attribute and modify some related functions.
1 parent 96149bd commit 7d0fc6e

File tree

2 files changed

+30
-22
lines changed

2 files changed

+30
-22
lines changed

src/mavedb/lib/score_sets.py

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,7 @@ def get_score_set_variants_as_csv(
402402
db: Session,
403403
score_set: ScoreSet,
404404
namespaces: List[Literal["scores", "counts"]],
405+
namespaced: Optional[bool] = None,
405406
start: Optional[int] = None,
406407
limit: Optional[int] = None,
407408
drop_na_columns: Optional[bool] = None,
@@ -419,6 +420,8 @@ def get_score_set_variants_as_csv(
419420
The score set to get the variants from.
420421
namespaces : List[Literal["scores", "counts"]]
421422
The namespaces for data. Now there are only scores and counts. There will be ClinVar and gnomAD.
423+
namespaced: Optional[bool] = None
424+
Whether namespace the columns or not.
422425
start : int, optional
423426
The index to start from. If None, starts from the beginning.
424427
limit : int, optional
@@ -493,10 +496,13 @@ def get_score_set_variants_as_csv(
493496
if limit:
494497
variants_query = variants_query.limit(limit)
495498
variants = db.scalars(variants_query).all()
496-
497-
rows_data = variants_to_csv_rows(variants, columns=namespaced_score_set_columns, mappings=mappings) # type: ignore
499+
rows_data = variants_to_csv_rows(variants, columns=namespaced_score_set_columns, namespaced=namespaced, mappings=mappings) # type: ignore
498500
rows_columns = [
499-
f"{namespace}.{col}" if namespace != "core" else col
501+
(
502+
f"{namespace}.{col}"
503+
if (namespaced and namespace not in ["core", "mavedb"])
504+
else (f"mavedb.{col}" if namespaced and namespace == "mavedb" else col)
505+
)
500506
for namespace, cols in namespaced_score_set_columns.items()
501507
for col in cols
502508
]
@@ -545,6 +551,7 @@ def variant_to_csv_row(
545551
variant: Variant,
546552
columns: dict[str, list[str]],
547553
mapping: Optional[MappedVariant] = None,
554+
namespaced: Optional[bool] = None,
548555
na_rep="NA",
549556
) -> dict[str, Any]:
550557
"""
@@ -556,6 +563,8 @@ def variant_to_csv_row(
556563
List of variants.
557564
columns : list[str]
558565
Columns to serialize.
566+
namespaced: Optional[bool] = None
567+
Namespace the columns or not.
559568
na_rep : str
560569
String to represent null values.
561570
@@ -594,22 +603,26 @@ def variant_to_csv_row(
594603
value = ""
595604
if is_null(value):
596605
value = na_rep
597-
row[f"mavedb.{column_key}"] = value
606+
key = f"mavedb.{column_key}" if namespaced else column_key
607+
row[key] = value
598608
for column_key in columns.get("scores", []):
599609
parent = variant.data.get("score_data") if variant.data else None
600610
value = str(parent.get(column_key)) if parent else na_rep
601-
row[f"scores.{column_key}"] = value
611+
key = f"scores.{column_key}" if namespaced else column_key
612+
row[key] = value
602613
for column_key in columns.get("counts", []):
603614
parent = variant.data.get("count_data") if variant.data else None
604615
value = str(parent.get(column_key)) if parent else na_rep
605-
row[f"counts.{column_key}"] = value
616+
key = f"counts.{column_key}" if namespaced else column_key
617+
row[key] = value
606618
return row
607619

608620

609621
def variants_to_csv_rows(
610622
variants: Sequence[Variant],
611623
columns: dict[str, list[str]],
612624
mappings: Optional[Sequence[Optional[MappedVariant]]] = None,
625+
namespaced: Optional[bool] = None,
613626
na_rep="NA",
614627
) -> Iterable[dict[str, Any]]:
615628
"""
@@ -621,6 +634,8 @@ def variants_to_csv_rows(
621634
List of variants.
622635
columns : list[str]
623636
Columns to serialize.
637+
namespaced: Optional[bool] = None
638+
Namespace the columns or not.
624639
na_rep : str
625640
String to represent null values.
626641
@@ -630,10 +645,10 @@ def variants_to_csv_rows(
630645
"""
631646
if mappings is not None:
632647
return map(
633-
lambda pair: variant_to_csv_row(pair[0], columns, mapping=pair[1], na_rep=na_rep),
648+
lambda pair: variant_to_csv_row(pair[0], columns, mapping=pair[1], namespaced=namespaced, na_rep=na_rep),
634649
zip(variants, mappings),
635650
)
636-
return map(lambda v: variant_to_csv_row(v, columns, na_rep=na_rep), variants)
651+
return map(lambda v: variant_to_csv_row(v, columns, namespaced=namespaced, na_rep=na_rep), variants)
637652

638653

639654
def find_meta_analyses_for_score_sets(db: Session, urns: list[str]) -> list[ScoreSet]:

src/mavedb/routers/score_sets.py

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,7 @@ def get_score_set_variants_csv(
319319
db,
320320
score_set,
321321
namespaces,
322+
True,
322323
start,
323324
limit,
324325
drop_na_columns,
@@ -379,11 +380,7 @@ def get_score_set_scores_csv(
379380

380381
assert_permission(user_data, score_set, Action.READ)
381382

382-
csv_str = get_score_set_variants_as_csv(db, score_set, ["scores"], start, limit, drop_na_columns)
383-
lines = csv_str.splitlines()
384-
if lines:
385-
header = lines[0].replace("scores.", "")
386-
csv_str = "\n".join([header] + lines[1:])
383+
csv_str = get_score_set_variants_as_csv(db, score_set, ["scores"], False, start, limit, drop_na_columns)
387384
return StreamingResponse(iter([csv_str]), media_type="text/csv")
388385

389386

@@ -438,11 +435,7 @@ async def get_score_set_counts_csv(
438435

439436
assert_permission(user_data, score_set, Action.READ)
440437

441-
csv_str = get_score_set_variants_as_csv(db, score_set, ["counts"], start, limit, drop_na_columns)
442-
lines = csv_str.splitlines()
443-
if lines:
444-
header = lines[0].replace("counts.", "")
445-
csv_str = "\n".join([header] + lines[1:])
438+
csv_str = get_score_set_variants_as_csv(db, score_set, ["counts"], False, start, limit, drop_na_columns)
446439
return StreamingResponse(iter([csv_str]), media_type="text/csv")
447440

448441

@@ -1256,20 +1249,20 @@ async def update_score_set(
12561249
assert item.dataset_columns is not None
12571250
score_columns = {
12581251
"core": ["hgvs_nt", "hgvs_splice", "hgvs_pro"],
1259-
"mavedb": item.dataset_columns["score_columns"],
1252+
"scores": item.dataset_columns["score_columns"],
12601253
}
12611254
count_columns = {
12621255
"core": ["hgvs_nt", "hgvs_splice", "hgvs_pro"],
1263-
"mavedb": item.dataset_columns["count_columns"],
1256+
"counts": item.dataset_columns["count_columns"],
12641257
}
12651258

12661259
scores_data = pd.DataFrame(
1267-
variants_to_csv_rows(item.variants, columns=score_columns)
1260+
variants_to_csv_rows(item.variants, columns=score_columns, namespaced=False)
12681261
).replace("NA", pd.NA)
12691262

12701263
if item.dataset_columns["count_columns"]:
12711264
count_data = pd.DataFrame(
1272-
variants_to_csv_rows(item.variants, columns=count_columns)
1265+
variants_to_csv_rows(item.variants, columns=count_columns, namespaced=False)
12731266
).replace("NA", pd.NA)
12741267
else:
12751268
count_data = None

0 commit comments

Comments
 (0)