Skip to content

Commit f362de1

Browse files
authored
Merge pull request #145 from pbashyal-nmdp/fix_sero_pandas_warning
Fix pandas warning for rel_dna_ser.txt
2 parents 2d1c96a + a2341e6 commit f362de1

File tree

1 file changed

+16
-3
lines changed

1 file changed

+16
-3
lines changed

pyard/data_repository.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -387,15 +387,28 @@ def to_serological_name(locus_name: str):
387387

388388
def generate_serology_mapping(db_connection: sqlite3.Connection, imgt_version):
389389
if not db.table_exists(db_connection, 'serology_mapping'):
390-
# Load WMDA serology mapping data
390+
"""
391+
Read `rel_dna_ser.txt` file that contains alleles and their serological equivalents.
392+
393+
The fields of the Alleles->Serological mapping file are:
394+
Locus - HLA Locus
395+
Allele - HLA Allele Name
396+
USA - Unambiguous Serological Antigen associated with allele
397+
PSA - Possible Serological Antigen associated with allele
398+
ASA - Assumed Serological Antigen associated with allele
399+
EAE - Expert Assigned Exceptions in search determinants of some registries
400+
401+
EAE is ignored when generating the serology map.
402+
"""
391403
rel_dna_ser_url = f'{IMGT_HLA_URL}{imgt_version}/wmda/rel_dna_ser.txt'
404+
# Load WMDA serology mapping data from URL
392405
df_sero = pd.read_csv(rel_dna_ser_url, sep=';', skiprows=6,
393-
names=['Locus', 'Allele', 'USA', 'PSA', 'ASA'],
406+
names=['Locus', 'Allele', 'USA', 'PSA', 'ASA', 'EAE'],
394407
index_col=False)
395408

396409
# Remove 0 and ? from USA
397410
df_sero = df_sero[(df_sero['USA'] != '0') & (df_sero['USA'] != '?')]
398-
df_sero['Allele'] = df_sero['Locus'] + df_sero['Allele']
411+
df_sero['Allele'] = df_sero.loc[:, 'Locus'] + df_sero.loc[:, 'Allele']
399412

400413
usa = df_sero[['Locus', 'Allele', 'USA']].dropna()
401414
usa['Sero'] = usa['Locus'] + usa['USA']

0 commit comments

Comments
 (0)