Skip to content

Commit a2797c5

Browse files
committed
Retrieve DRB columns from locus_column_mapping rather than guessing column names.
1 parent f577bd9 commit a2797c5

File tree

3 files changed

+62
-27
lines changed

3 files changed

+62
-27
lines changed

extras/reduce_conf.json

Lines changed: 43 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,12 @@
1414
"r_drb1_typ2",
1515
"r_dpb1_typ1",
1616
"r_dpb1_typ2",
17+
"r_drb3_typ1",
18+
"r_drb3_typ2",
19+
"r_drb4_typ1",
20+
"r_drb4_typ2",
21+
"r_drb5_typ1",
22+
"r_drb5_typ2",
1723
"d_a_typ1",
1824
"d_a_typ2",
1925
"d_b_typ1",
@@ -23,7 +29,13 @@
2329
"d_drb1_typ1",
2430
"d_drb1_typ2",
2531
"d_dpb1_typ1",
26-
"d_dpb1_typ2"
32+
"d_dpb1_typ2",
33+
"d_drb3_typ1",
34+
"d_drb3_typ2",
35+
"d_drb4_typ1",
36+
"d_drb4_typ2",
37+
"d_drb5_typ1",
38+
"d_drb5_typ2"
2739
],
2840
"locus_column_mapping": {
2941
"recipient": {
@@ -39,13 +51,25 @@
3951
"r_c_typ1",
4052
"r_c_typ2"
4153
],
54+
"dqb1": [
55+
"r_dpb1_typ1",
56+
"r_dpb1_typ2"
57+
],
4258
"drb1": [
4359
"r_drb1_typ1",
4460
"r_drb1_typ2"
4561
],
46-
"dqb1": [
47-
"r_dpb1_typ1",
48-
"r_dpb1_typ2"
62+
"drb3": [
63+
"r_drb3_typ1",
64+
"r_drb3_typ2"
65+
],
66+
"drb4": [
67+
"r_drb4_typ1",
68+
"r_drb4_typ2"
69+
],
70+
"drb5": [
71+
"r_drb5_typ1",
72+
"r_drb5_typ2"
4973
]
5074
},
5175
"donor": {
@@ -61,20 +85,30 @@
6185
"d_c_typ1",
6286
"d_c_typ2"
6387
],
88+
"dqb1": [
89+
"d_dpb1_typ1",
90+
"d_dpb1_typ2"
91+
],
6492
"drb1": [
6593
"d_drb1_typ1",
6694
"d_drb1_typ2"
6795
],
68-
"dqb1": [
69-
"d_dpb1_typ1",
70-
"d_dpb1_typ2"
96+
"drb3": [
97+
"d_drb3_typ1",
98+
"d_drb3_typ2"
99+
],
100+
"drb4": [
101+
"d_drb4_typ1",
102+
"d_drb4_typ2"
103+
],
104+
"drb5": [
105+
"d_drb5_typ1",
106+
"d_drb5_typ2"
71107
]
72108
}
73109
},
74-
75110
"redux_type": "lgx",
76111
"redux_cache_size": 1000,
77-
78112
"reduce_serology": false,
79113
"reduce_v2": true,
80114
"convert_v2_to_v3": false,
@@ -84,17 +118,12 @@
84118
"reduce_XX": false,
85119
"reduce_MAC": true,
86120
"map_drb345_to_drbx": false,
87-
88121
"locus_in_allele_name": true,
89122
"keep_locus_in_allele_name": true,
90-
91123
"new_column_for_redux": true,
92124
"reduced_column_prefix": "reduced_",
93-
94125
"generate_glstring": true,
95-
96126
"output_file_format": "csv",
97127
"apply_compression": "gzip",
98-
99128
"verbose_log": true
100129
}

extras/sample.csv

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
rid,did,r_a_typ1,r_a_typ2,r_b_typ1,r_b_typ2,r_c_typ1,r_c_typ2,r_drb1_typ1,r_drb1_typ2,r_dpb1_typ1,r_dpb1_typ2,d_a_typ1,d_a_typ2,d_b_typ1,d_b_typ2,d_c_typ1,d_c_typ2,d_drb1_typ1,d_drb1_typ2,d_dpb1_typ1,d_dpb1_typ2
2-
2110,123,A*01:AB,A*29:79,B*18:67,B*51:275,C*05:01:19,C*02:85:02,DRB1*03:03,DRB1*14:144,DPB1*193:01:01,DPB1*582:01:01,A*01:AB,A*29:79,B*18:67,B*51:275,C*05:01:19,C*02:85:02,DRB1*03:03,DRB1*14:144,DPB1*193:01:01,DPB1*582:01:01
3-
2111,456,A*01:01:42,A*30:12:02,B*44:02:32,B*35:42,C*03:148,C*04:322,DRB1*13:01:16,DRB1*15:80N,DPB1*914:01:01,DPB1*278:01:01,A*01:01:42,A*30:12:02,B*44:02:32,B*35:42,C*03:148,C*04:322,DRB1*13:01:16,DRB1*15:80N,DPB1*914:01:01,DPB1*278:01:01
4-
2113,789,A*02:247,A*03:227,B*15:570,B*07:02:01:17,C*16:01:10,C*06:102,DRB1*13:156,DRB1*14:167:01,DPB1*405:01:01,DPB1*479:01:01,A*02:247,A*03:227,B*15:570,B*07:02:01:17,C*16:01:10,C*06:102,DRB1*13:156,DRB1*14:167:01,DPB1*405:01:01,DPB1*479:01:01
1+
rid,did,r_a_typ1,r_a_typ2,r_b_typ1,r_b_typ2,r_c_typ1,r_c_typ2,r_drb1_typ1,r_drb1_typ2,r_dpb1_typ1,r_dpb1_typ2,d_a_typ1,d_a_typ2,d_b_typ1,d_b_typ2,d_c_typ1,d_c_typ2,d_drb1_typ1,d_drb1_typ2,d_dpb1_typ1,d_dpb1_typ2,r_drb3_typ1,r_drb3_typ2,r_drb4_typ1,r_drb4_typ2,r_drb5_typ1,r_drb5_typ2,d_drb3_typ1,d_drb3_typ2,d_drb4_typ1,d_drb4_typ2,d_drb5_typ1,d_drb5_typ2
2+
2110,123,A*01:AB,A*29:79,B*18:67,B*51:275,C*05:01:19,C*02:85:02,DRB1*03:03,DRB1*14:144,DPB1*193:01:01,DPB1*582:01:01,A*01:AB,A*29:79,B*18:67,B*51:275,C*05:01:19,C*02:85:02,DRB1*03:03,DRB1*14:144,DPB1*193:01:01,DPB1*582:01:01,DRB3*02:189,DRB3*03:09,NNNN,NNNN,NNNN,NNNN,NNNN,NNNN,NNNN,NNNN,DRB5*01:93,DRB5*02:02:01
3+
2111,456,A*01:01:42,A*30:12:02,B*44:02:32,B*35:42,C*03:148,C*04:322,DRB1*13:01:16,DRB1*15:80N,DPB1*914:01:01,DPB1*278:01:01,A*01:01:42,A*30:12:02,B*44:02:32,B*35:42,C*03:148,C*04:322,DRB1*13:01:16,DRB1*15:80N,DPB1*914:01:01,DPB1*278:01:01,NNNN,NNNN,DRB4*01:53,DRB4*01:31,NNNN,NNNN,NNNN,NNNN,NNNN,NNNN,DRB5*01:102,DRB5*01:103
4+
2113,789,A*02:247,A*03:227,B*15:570,B*07:02:01:17,C*16:01:10,C*06:102,DRB1*13:156,DRB1*14:167:01,DPB1*405:01:01,DPB1*479:01:01,A*02:247,A*03:227,B*15:570,B*07:02:01:17,C*16:01:10,C*06:102,DRB1*13:156,DRB1*14:167:01,DPB1*405:01:01,DPB1*479:01:01,NNNN,NNNN,DRB4*01:79,DRB4*01:119,NNNN,NNNN,DRB3*02:189,DRB3*03:09,NNNN,NNNN,NNNN,NNNN

scripts/pyard-reduce-csv

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -209,15 +209,21 @@ def reduce_locus_columns(df, ard_config, locus_column_mapping, verbose):
209209
# New columns DRBX_1 and DRBX_2 are created
210210
if ard_config.get("map_drb345_to_drbx"):
211211
drbx_loci = ["DRB3", "DRB4", "DRB5"]
212-
drbx_columns = [
213-
col_name for col_name in df.columns if col_name.split("_")[1] in drbx_loci
214-
]
215-
if len(drbx_columns) == len(drbx_loci) * 2: # For Type1/Type2
216-
locus_in_allele_name = ard_config["keep_locus_in_allele_name"]
217-
df_drbx = df[drbx_columns].apply(
218-
create_drbx, axis=1, args=(locus_in_allele_name,)
219-
)
220-
df["DRBX_1"], df["DRBX_2"] = zip(*df_drbx)
212+
for subject in ard_config["locus_column_mapping"].keys():
213+
subject_loci = ard_config["locus_column_mapping"][subject]
214+
subject_drbs = []
215+
for locus in ard_config["locus_column_mapping"][subject].keys():
216+
if locus.upper() in drbx_loci:
217+
subject_drbs.extend(subject_loci[locus])
218+
219+
# If all the DRBs are there
220+
# ['DRB3_1', 'DRB3_2', 'DRB4_1', 'DRB4_2', 'DRB5_1', 'DRB5_2']
221+
if len(subject_drbs) == 6:
222+
locus_in_allele_name = ard_config["keep_locus_in_allele_name"]
223+
df_drbx = df[subject_drbs].apply(
224+
create_drbx, axis=1, args=(locus_in_allele_name,)
225+
)
226+
df[f"{subject}_DRBX_1"], df[f"{subject}_DRBX_2"] = zip(*df_drbx)
221227

222228
if ard_config.get("generate_glstring"):
223229
for subject in locus_column_mapping:

0 commit comments

Comments
 (0)