Skip to content

Commit 30039c9

Browse files
committed
Batch Reductions Update:
- `homozygosify_glstring`: When creating a GL String if one of the typ allele(s) is not available, homozygosify from the other typ - `suppress_reduced_locus_column`: When creating GL String, you may not want the reduced locus columns in the output.
1 parent 2c570be commit 30039c9

File tree

5 files changed

+33
-7
lines changed

5 files changed

+33
-7
lines changed

extras/reduce_conf.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,8 @@
123123
"new_column_for_redux": true,
124124
"reduced_column_prefix": "reduced_",
125125
"generate_glstring": true,
126+
"homozygosify_glstring": true,
127+
"suppress_reduced_locus_column": true,
126128
"output_file_format": "csv",
127129
"apply_compression": "gzip",
128130
"verbose_log": true

extras/reduce_conf_glstring.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
"new_column_for_redux": true,
2828
"reduced_column_prefix": "reduced_",
2929
"generate_glstring": true,
30+
"homozygosify_glstring": true,
31+
"suppress_reduced_locus_column": true,
3032
"output_file_format": "csv",
3133
"apply_compression": "gzip",
3234
"verbose_log": true

extras/sample.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
rid,did,r_a_typ1,r_a_typ2,r_b_typ1,r_b_typ2,r_c_typ1,r_c_typ2,r_drb1_typ1,r_drb1_typ2,r_dpb1_typ1,r_dpb1_typ2,d_a_typ1,d_a_typ2,d_b_typ1,d_b_typ2,d_c_typ1,d_c_typ2,d_drb1_typ1,d_drb1_typ2,d_dpb1_typ1,d_dpb1_typ2,r_drb3_typ1,r_drb3_typ2,r_drb4_typ1,r_drb4_typ2,r_drb5_typ1,r_drb5_typ2,d_drb3_typ1,d_drb3_typ2,d_drb4_typ1,d_drb4_typ2,d_drb5_typ1,d_drb5_typ2
2-
2110,123,A*01:AB,A*29:79,B*18:67,B*51:275,C*05:01:19,C*02:85:02,DRB1*03:03,DRB1*14:144,DPB1*193:01:01,DPB1*582:01:01,A*01:AB,A*29:79,B*18:67,B*51:275,C*05:01:19,C*02:85:02,DRB1*03:03,DRB1*14:144,DPB1*193:01:01,DPB1*582:01:01,DRB3*02:189,DRB3*03:09,NNNN,NNNN,NNNN,NNNN,NNNN,NNNN,NNNN,NNNN,DRB5*01:93,DRB5*02:02:01
2+
2110,123,A*01:AB,,B*18:67,B*51:275,C*05:01:19,C*02:85:02,DRB1*03:03,DRB1*14:144,DPB1*193:01:01,DPB1*582:01:01,A*01:AB,A*29:79,B*18:67,B*51:275,C*05:01:19,C*02:85:02,DRB1*03:03,DRB1*14:144,DPB1*193:01:01,DPB1*582:01:01,DRB3*02:189,DRB3*03:09,NNNN,NNNN,NNNN,NNNN,NNNN,NNNN,NNNN,NNNN,DRB5*01:93,DRB5*02:02:01
33
2111,456,A*01:01:42,A*30:12:02,B*44:02:32,B*35:42,C*03:148,C*04:322,DRB1*13:01:16,DRB1*15:80N,DPB1*914:01:01,DPB1*278:01:01,A*01:01:42,A*30:12:02,B*44:02:32,B*35:42,C*03:148,C*04:322,DRB1*13:01:16,DRB1*15:80N,DPB1*914:01:01,DPB1*278:01:01,NNNN,NNNN,DRB4*01:53,DRB4*01:31,NNNN,NNNN,NNNN,NNNN,NNNN,NNNN,DRB5*01:102,DRB5*01:103
44
2113,789,A*02:247,A*03:227,B*15:570,B*07:02:01:17,C*16:01:10,C*06:102,DRB1*13:156,DRB1*14:167:01,DPB1*405:01:01,DPB1*479:01:01,A*02:247,A*03:227,B*15:570,B*07:02:01:17,C*16:01:10,C*06:102,DRB1*13:156,DRB1*14:167:01,DPB1*405:01:01,DPB1*479:01:01,NNNN,NNNN,DRB4*01:79,DRB4*01:119,NNNN,NNNN,DRB3*02:189,DRB3*03:09,NNNN,NNNN,NNNN,NNNN

extras/sample_glstring.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
rid,did,recip_gl,donor_gl
2-
123,456,A*02:GNF+A*03:XYZ^B*07:ABD+B*44:AWA,A*02:01:01+A*03:01:01^B*07:RVXR+B*44:XYAG
2+
123,456,A*02:GNF+A*03:AB^B*07:ABD+B*44:AWA,A*02:01:01+A*03:01:01^B*07:RVXR+B*44:XYAG
33
789,345,A*01:TUS+A*24:02:01G^B*08:ARGR+B*08:ARGS,A*02:01:01+A*01:PXTD^B*51:01:01G+B*40:BWUP

scripts/pyard-reduce-csv

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -231,22 +231,44 @@ def reduce_locus_columns(df, ard_config, locus_column_mapping, verbose):
231231
for locus in locus_column_mapping[subject]:
232232
slug_column = locus + "_slug"
233233
slug_columns.append(slug_column)
234-
if len(locus_column_mapping[subject][locus]) > 1:
235-
df[slug_column] = (
236-
df[locus_column_mapping[subject][locus][0]]
237-
+ "+"
238-
+ df[locus_column_mapping[subject][locus][1]]
234+
locus_typ_pair = locus_column_mapping[subject][locus]
235+
if len(locus_typ_pair) > 1:
236+
df[slug_column] = df[locus_typ_pair].apply(
237+
create_reduced_slug, axis=1
239238
)
240239
else:
241240
df[slug_column] = df[locus_column_mapping[subject][locus][0]]
242241

242+
if ard_config.get("suppress_reduced_locus_column"):
243+
df.drop(columns=locus_typ_pair, inplace=True)
244+
243245
df[subject + "_gl"] = df[slug_columns].agg("^".join, axis=1)
244246
df[subject + "_gl"] = df[subject + "_gl"].apply(
245247
lambda gl: gl.replace("^+", "")
246248
)
247249
df.drop(columns=slug_columns, inplace=True)
248250

249251

252+
def create_reduced_slug(locus_typ1_typ2_pair):
253+
typ1 = locus_typ1_typ2_pair.iloc[0]
254+
typ2 = locus_typ1_typ2_pair.iloc[1]
255+
256+
if not typ1 and not typ2:
257+
return ""
258+
259+
if typ1 and typ2:
260+
return typ1 + "+" + typ2
261+
elif ard_config.get("homozygosify_glstring"):
262+
if typ1:
263+
return typ1 + "+" + typ1
264+
if typ2:
265+
return typ2 + "+" + typ2
266+
else:
267+
if typ2:
268+
return typ2
269+
return typ1
270+
271+
250272
def reduce_glstring(glstring: str) -> str:
251273
try:
252274
return ard.redux(glstring, ard_config["redux_type"])

0 commit comments

Comments
 (0)