Skip to content

Commit 00d6b10

Browse files
authored
Merge pull request #98 from pbashyal-nmdp/fix_excel_created_problems1
Handle cases when there is no typing and when redux fails.
2 parents cb5a0ab + c02fce6 commit 00d6b10

File tree

4 files changed

+83
-65
lines changed

4 files changed

+83
-65
lines changed

pyard/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,4 @@
2424
from .pyard import ARD
2525

2626
__author__ = """NMDP Bioinformatics"""
27-
__version__ = '0.6.5'
27+
__version__ = '0.6.6'

scripts/pyard-reduce-csv

100644100755
Lines changed: 80 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -56,19 +56,6 @@ def is_P(allele: str) -> bool:
5656
return False
5757

5858

59-
def clean_locus(allele: str, column_name: str = 'Unknown') -> str:
60-
if allele:
61-
# Remove all white spaces
62-
allele = white_space_regex.sub('', allele)
63-
locus = column_name.split('_')[1].upper()
64-
# If the allele comes in as an allele list, apply reduce to all alleles
65-
if '/' in allele:
66-
return "/".join(map(reduce, allele.split('/'), locus))
67-
else:
68-
return reduce(allele, locus)
69-
return allele
70-
71-
7259
def should_be_reduced(allele, locus_allele):
7360
if is_serology(allele):
7461
return ard_config["reduce_serology"]
@@ -96,8 +83,10 @@ def should_be_reduced(allele, locus_allele):
9683
return False
9784

9885

99-
def reduce(allele, locus):
86+
def reduce(allele, locus, column_name):
10087
# Does the allele name have the locus in it ?
88+
if allele == '':
89+
return allele
10190
if '*' in allele:
10291
locus_allele = allele
10392
elif ard_config["locus_in_allele_name"]:
@@ -108,7 +97,15 @@ def reduce(allele, locus):
10897
# Check the config if this allele should be reduced
10998
if should_be_reduced(allele, locus_allele):
11099
# print(f"reducing '{locus_allele}'")
111-
reduced_allele = ard.redux_gl(locus_allele, ard_config["redux_type"])
100+
try:
101+
reduced_allele = ard.redux_gl(locus_allele, ard_config["redux_type"])
102+
except RuntimeError as e:
103+
if verbose:
104+
print(e)
105+
message = f"Failed reducing '{locus_allele}' in column {column_name}"
106+
print(message)
107+
failure_summary_messages.append(message)
108+
return allele
112109
# print(f"reduced to '{reduced_allele}'")
113110
if reduced_allele:
114111
if ard_config["keep_locus_in_allele_name"]:
@@ -129,6 +126,19 @@ def reduce(allele, locus):
129126
return allele
130127

131128

129+
def clean_locus(allele: str, column_name: str = 'Unknown') -> str:
130+
if allele:
131+
# Remove all white spaces
132+
allele = white_space_regex.sub('', allele)
133+
locus = column_name.split('_')[1].upper()
134+
# If the allele comes in as an allele list, apply reduce to all alleles
135+
if '/' in allele:
136+
return "/".join(map(reduce, allele.split('/'), locus, column_name))
137+
else:
138+
return reduce(allele, locus, column_name)
139+
return allele
140+
141+
132142
def create_drbx(row, locus_in_allele_name):
133143
return drbx.map_drbx(row.values, locus_in_allele_name)
134144

@@ -159,51 +169,59 @@ if __name__ == '__main__':
159169
print(" pip install openpyxl")
160170
sys.exit(1)
161171

162-
# Instantiate py-ard object with the latest
163-
ard = pyard.ARD(remove_invalid=False)
164-
165-
# Read the Input File
166-
# Read only the columns to be saved.
167-
# Header is the first row
168-
# Don't convert to NAs
169-
df = pd.read_csv(ard_config["in_csv_filename"],
170-
usecols=ard_config["columns_from_csv"],
171-
header=0, dtype=str,
172-
keep_default_na=False)
173-
174-
# Reduce each of the specified columns
175-
for column in ard_config["columns_to_reduce_in_csv"]:
176-
if verbose:
177-
print(f"Column:{column} =>")
178-
if ard_config["new_column_for_redux"]:
179-
# insert a new column
180-
new_column_name = f"reduced_{column}"
181-
new_column_index = df.columns.get_loc(column) + 1
182-
# Apply clean_locus function to the column and insert as a new column
183-
df.insert(new_column_index, new_column_name,
184-
df[column].apply(clean_locus, column_name=column))
172+
# Instantiate py-ard object with the latest
173+
ard = pyard.ARD(remove_invalid=False)
174+
175+
# Read the Input File
176+
# Read only the columns to be saved.
177+
# Header is the first row
178+
# Don't convert to NAs
179+
df = pd.read_csv(ard_config["in_csv_filename"],
180+
usecols=ard_config["columns_from_csv"],
181+
header=0, dtype=str,
182+
keep_default_na=False)
183+
184+
failure_summary_messages = []
185+
# Reduce each of the specified columns
186+
for column in ard_config["columns_to_reduce_in_csv"]:
187+
if verbose:
188+
print(f"Column:{column} =>")
189+
if ard_config["new_column_for_redux"]:
190+
# insert a new column
191+
new_column_name = f"reduced_{column}"
192+
new_column_index = df.columns.get_loc(column) + 1
193+
# Apply clean_locus function to the column and insert as a new column
194+
df.insert(new_column_index, new_column_name,
195+
df[column].apply(clean_locus, column_name=column))
196+
else:
197+
# Apply clean_locus function to the column and replace the column
198+
df[column] = df[column].apply(clean_locus, column_name=column)
199+
200+
# Map DRB3,DRB4,DRB5 to DRBX if specified
201+
# New columns DRBX_1 and DRBX_2 are created
202+
if ard_config['map_drb345_to_drbx']:
203+
drbx_loci = ['DRB3', 'DRB4', 'DRB5']
204+
drbx_columns = [col_name for col_name in df.columns if col_name.split('_')[1] in drbx_loci]
205+
if len(drbx_columns) == len(drbx_loci) * 2: # For Type1/Type2
206+
locus_in_allele_name = ard_config["keep_locus_in_allele_name"]
207+
df_drbx = df[drbx_columns].apply(create_drbx, axis=1, args=(locus_in_allele_name,))
208+
df['DRBX_1'], df['DRBX_2'] = zip(*df_drbx)
209+
210+
# Save as XLSX if specified
211+
if ard_config["output_file_format"] == 'xlsx':
212+
out_file_name = f"{ard_config['out_csv_filename']}.xlsx"
213+
df.to_excel(out_file_name, index=False)
214+
else:
215+
# Save as compressed CSV
216+
out_file_name = f"{ard_config['out_csv_filename'] + '.gz' if ard_config['apply_compression'] else ''}"
217+
df.to_csv(out_file_name, index=False, compression=ard_config["apply_compression"])
218+
219+
if len(failure_summary_messages) == 0:
220+
print("No Errors")
185221
else:
186-
# Apply clean_locus function to the column and replace the column
187-
df[column] = df[column].apply(clean_locus, column_name=column)
188-
189-
# Map DRB3,DRB4,DRB5 to DRBX if specified
190-
# New columns DRBX_1 and DRBX_2 are created
191-
if ard_config['map_drb345_to_drbx']:
192-
drbx_loci = ['DRB3', 'DRB4', 'DRB5']
193-
drbx_columns = [col_name for col_name in df.columns if col_name.split('_')[1] in drbx_loci]
194-
if len(drbx_columns) == len(drbx_loci) * 2: # For Type1/Type2
195-
locus_in_allele_name = ard_config["keep_locus_in_allele_name"]
196-
df_drbx = df[drbx_columns].apply(create_drbx, axis=1, args=(locus_in_allele_name,))
197-
df['DRBX_1'], df['DRBX_2'] = zip(*df_drbx)
198-
199-
# Save as XLSX if specified
200-
if ard_config["output_file_format"] == 'xlsx':
201-
out_file_name = f"{ard_config['out_csv_filename']}.xlsx"
202-
df.to_excel(out_file_name, index=False)
203-
else:
204-
# Save as compressed CSV
205-
out_file_name = f"{ard_config['out_csv_filename'] + '.gz' if ard_config['apply_compression'] else ''}"
206-
df.to_csv(out_file_name, index=False, compression=ard_config["apply_compression"])
207-
208-
# Done
209-
print(f"Saved result to file:{out_file_name}")
222+
print("Summary")
223+
print("-------")
224+
for message in failure_summary_messages:
225+
print("\t", message)
226+
# Done
227+
print(f"Saved result to file:{out_file_name}")

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 0.6.5
2+
current_version = 0.6.6
33
commit = True
44
tag = True
55

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242

4343
setup(
4444
name='py-ard',
45-
version='0.6.5',
45+
version='0.6.6',
4646
description="ARD reduction for HLA with Python",
4747
long_description=readme + '\n\n' + history,
4848
author="CIBMTR",

0 commit comments

Comments
 (0)