Skip to content

Commit 2574390

Browse files
committed
Fix generate_glstring to output genotype glstring
1 parent 086b40b commit 2574390

File tree

1 file changed

+42
-27
lines changed

1 file changed

+42
-27
lines changed

scripts/pyard-reduce-csv

Lines changed: 42 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ def reduce(allele, locus, column_name):
110110
return allele
111111
if "*" in allele:
112112
locus_allele = allele
113-
elif ard_config["locus_in_allele_name"]:
113+
elif ard_config.get("locus_in_allele_name"):
114114
locus_allele = allele
115115
else:
116116
locus_allele = f"{locus}*{allele}"
@@ -129,7 +129,7 @@ def reduce(allele, locus, column_name):
129129
return allele
130130
# print(f"reduced to '{reduced_allele}'")
131131
if reduced_allele:
132-
if ard_config["keep_locus_in_allele_name"]:
132+
if ard_config.get("keep_locus_in_allele_name"):
133133
allele = reduced_allele
134134
else:
135135
allele = remove_locus_name(reduced_allele)
@@ -139,16 +139,16 @@ def reduce(allele, locus, column_name):
139139
if verbose:
140140
print(f"\t{locus_allele} => {allele}")
141141
else:
142-
if ard_config["convert_v2_to_v3"]:
142+
if ard_config.get("convert_v2_to_v3"):
143143
if ard.is_v2(locus_allele):
144144
v3_allele = ard.v2_to_v3(locus_allele)
145-
if not ard_config["keep_locus_in_allele_name"]:
145+
if not ard_config.get("keep_locus_in_allele_name"):
146146
allele = remove_locus_name(v3_allele)
147147
else:
148148
allele = v3_allele
149149
if verbose:
150150
print(f"\t{locus_allele} => {allele}")
151-
elif ard_config["keep_locus_in_allele_name"]:
151+
elif ard_config.get("keep_locus_in_allele_name"):
152152
allele = locus_allele
153153

154154
return allele
@@ -186,17 +186,29 @@ if __name__ == "__main__":
186186
dest="imgt_version",
187187
help="IPD-IMGT/HLA db to use for redux",
188188
)
189+
parser.add_argument(
190+
"-q",
191+
"--quiet",
192+
dest="quiet",
193+
action="store_true",
194+
default=False,
195+
help="Don't print verbose log",
196+
)
189197
args = parser.parse_args()
190198
config_filename = args.config
191199

192200
print("Using config file:", config_filename)
193201
with open(config_filename) as conf_file:
194202
ard_config = json.load(conf_file)
195203

196-
verbose = ard_config["verbose_log"]
204+
if not args.quiet:
205+
verbose = ard_config.get("verbose_log")
206+
else:
207+
verbose = False
208+
197209
white_space_regex = re.compile(r"\s+")
198210

199-
if ard_config["output_file_format"] == "xlsx":
211+
if ard_config.get("output_file_format") == "xlsx":
200212
try:
201213
import openpyxl
202214
except ImportError:
@@ -224,24 +236,20 @@ if __name__ == "__main__":
224236
keep_default_na=False,
225237
)
226238
except FileNotFoundError as e:
227-
print(f"File not found {ard_config['in_csv_filename']}", file=sys.stderr)
239+
print(f"File not found {ard_config.get('in_csv_filename')}", file=sys.stderr)
228240
sys.exit(1)
229241

230242
reduce_prefix = "reduced_"
231243
failed_to_reduce_alleles = []
232-
reduced_column_mappings = {}
233244
locus_column_mapping = ard_config["locus_column_mapping"]
234245
for subject in locus_column_mapping:
235-
reduced_column_mappings[subject] = {}
236246
for locus in locus_column_mapping[subject]:
237-
if locus not in reduced_column_mappings[subject]:
238-
reduced_column_mappings[subject][locus] = []
239247
# Reduce each of the specified columns
240248
locus_columns = locus_column_mapping[subject][locus]
241249
for column in locus_columns:
242250
if verbose:
243251
print(f"Column:{column} =>")
244-
if ard_config["new_column_for_redux"]:
252+
if ard_config.get("new_column_for_redux"):
245253
# insert a new column
246254
new_column_name = f"{reduce_prefix}{column}"
247255
new_column_index = df.columns.get_loc(column) + 1
@@ -251,17 +259,16 @@ if __name__ == "__main__":
251259
new_column_name,
252260
df[column].apply(clean_locus, locus=locus, column_name=column),
253261
)
254-
reduced_column_mappings[subject][locus].append(new_column_name)
262+
locus_columns[locus_columns.index(column)] = new_column_name
255263
else:
256264
# Apply clean_locus function to the column and replace the column
257265
df[column] = df[column].apply(
258266
clean_locus, locus=locus, column_name=column
259267
)
260-
reduced_column_mappings[subject][locus].append(column)
261268

262269
# Map DRB3,DRB4,DRB5 to DRBX if specified
263270
# New columns DRBX_1 and DRBX_2 are created
264-
if ard_config["map_drb345_to_drbx"]:
271+
if ard_config.get("map_drb345_to_drbx"):
265272
drbx_loci = ["DRB3", "DRB4", "DRB5"]
266273
drbx_columns = [
267274
col_name for col_name in df.columns if col_name.split("_")[1] in drbx_loci
@@ -273,18 +280,26 @@ if __name__ == "__main__":
273280
)
274281
df["DRBX_1"], df["DRBX_2"] = zip(*df_drbx)
275282

276-
if ard_config["generate_glstring"]:
277-
for subject in reduced_column_mappings:
278-
for haplotype_num in range(2):
279-
hap1_columns = list(
280-
map(
281-
lambda x: reduced_column_mappings[subject][x][haplotype_num],
282-
reduced_column_mappings[subject].keys(),
283+
if ard_config.get("generate_glstring"):
284+
for subject in locus_column_mapping:
285+
slug_columns = []
286+
for locus in locus_column_mapping[subject]:
287+
slug_column = locus + "_slug"
288+
slug_columns.append(slug_column)
289+
if len(locus_column_mapping[subject][locus]) > 1:
290+
df[slug_column] = (
291+
df[locus_column_mapping[subject][locus][0]]
292+
+ "+"
293+
+ df[locus_column_mapping[subject][locus][1]]
283294
)
284-
)
285-
df[subject + f"_haplotype_{(haplotype_num + 1)}"] = df[
286-
hap1_columns
287-
].agg("~".join, axis=1)
295+
else:
296+
df[slug_column] = df[locus_column_mapping[subject][locus][0]]
297+
298+
df[subject + "_gl"] = df[slug_columns].agg("^".join, axis=1)
299+
df[subject + "_gl"] = df[subject + "_gl"].apply(
300+
lambda gl: gl.replace("^+", "")
301+
)
302+
df.drop(columns=slug_columns, inplace=True)
288303

289304
# Save as XLSX if specified
290305
if ard_config["output_file_format"] == "xlsx":

0 commit comments

Comments
 (0)