Skip to content

Commit f392a0d

Browse files
committed
Merge branch 'littlep' of https://github.com/mmaiers-nmdp/py-ard into mmaiers-nmdp-littlep
2 parents b7fa29c + fd69390 commit f392a0d

File tree

5 files changed

+49
-24
lines changed

5 files changed

+49
-24
lines changed

pyard/data_repository.py

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,8 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version):
8181

8282
df['2d'] = df['A'].apply(get_2field_allele)
8383
df['3d'] = df['A'].apply(get_3field_allele)
84+
df['lg'] = df['G'].apply(lambda a: ":".join(a.split(":")[0:2]) + "g")
85+
df['lgx'] = df['G'].apply(lambda a: ":".join(a.split(":")[0:2]))
8486

8587
mg = df.drop_duplicates(['2d', 'G'])['2d'].value_counts()
8688
multiple_g_list = mg[mg > 1].reset_index()['index'].to_list()
@@ -90,8 +92,23 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version):
9092
.groupby('2d', as_index=True).agg("/".join) \
9193
.to_dict()['G']
9294

93-
df['lg'] = df['G'].apply(lambda a: ":".join(a.split(":")[0:2]) + "g")
94-
df['lgx'] = df['G'].apply(lambda a: ":".join(a.split(":")[0:2]))
95+
mlg = df.drop_duplicates(['2d', 'lg'])['2d'].value_counts()
96+
multiple_lg_list = mlg[mlg > 1].reset_index()['index'].to_list()
97+
98+
dup_lg = df[df['2d'].isin(multiple_lg_list)][['lg', '2d']] \
99+
.drop_duplicates() \
100+
.groupby('2d', as_index=True).agg("/".join) \
101+
.to_dict()['lg']
102+
103+
mlgx = df.drop_duplicates(['2d', 'lgx'])['2d'].value_counts()
104+
multiple_lgx_list = mlgx[mlgx > 1].reset_index()['index'].to_list()
105+
106+
dup_lgx = df[df['2d'].isin(multiple_lgx_list)][['lgx', '2d']] \
107+
.drop_duplicates() \
108+
.groupby('2d', as_index=True).agg("/".join) \
109+
.to_dict()['lgx']
110+
111+
95112

96113
# Creating dictionaries with mac_code->ARS group mapping
97114
df_g = pd.concat([
@@ -116,11 +133,13 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version):
116133
lgx_group = df_lgx.set_index('A')['lgx'].to_dict()
117134

118135
db.save_dict(db_connection, table_name='dup_g', dictionary=dup_g, columns=('allele', 'g_group'))
136+
db.save_dict(db_connection, table_name='dup_lg', dictionary=dup_lg, columns=('allele', 'lg_group'))
137+
db.save_dict(db_connection, table_name='dup_lgx', dictionary=dup_lgx, columns=('allele', 'lgx_group'))
119138
db.save_dict(db_connection, table_name='g_group', dictionary=g_group, columns=('allele', 'g'))
120139
db.save_dict(db_connection, table_name='lg_group', dictionary=lg_group, columns=('allele', 'lg'))
121140
db.save_dict(db_connection, table_name='lgx_group', dictionary=lgx_group, columns=('allele', 'lgx'))
122141

123-
return dup_g, g_group, lg_group, lgx_group
142+
return dup_g, dup_lg, dup_lgx, g_group, lg_group, lgx_group
124143

125144

126145
def generate_alleles_and_xx_codes(db_connection: sqlite3.Connection, imgt_version):

pyard/pyard.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def __init__(self, imgt_version: str = 'Latest',
6363
# Load Alleles and XX Codes
6464
self.valid_alleles, self.xx_codes = generate_alleles_and_xx_codes(self.db_connection, imgt_version)
6565
# Load ARS mappings
66-
self.dup_g, self._G, self._lg, self._lgx = generate_ars_mapping(self.db_connection, imgt_version)
66+
self.dup_g, self.dup_lg, self.dup_lgx, self._G, self._lg, self._lgx = generate_ars_mapping(self.db_connection, imgt_version)
6767
# Load Serology mappings
6868
generate_serology_mapping(self.db_connection, imgt_version)
6969
# Load V2 to V3 mappings
@@ -117,14 +117,18 @@ def redux(self, allele: str, ars_type: str) -> str:
117117
else:
118118
return self._G[allele]
119119
elif ars_type == "lg":
120-
if allele in self._lg:
120+
if allele in self.dup_lg:
121+
return self.dup_lg[allele]
122+
elif allele in self._lg:
121123
return self._lg[allele]
122124
else:
123125
# for 'lg' when allele is not in G group,
124126
# return allele with only first 2 field
125127
return ':'.join(allele.split(':')[0:2]) + 'g'
126128
elif ars_type == "lgx":
127-
if allele in self._lgx:
129+
if allele in self.dup_lgx:
130+
return self.dup_lgx[allele]
131+
elif allele in self._lgx:
128132
return self._lgx[allele]
129133
else:
130134
# for 'lgx' when allele is not in G group,

tests/environment.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22

33

44
def before_all(context):
5-
context.ard = ARD('3290', data_dir='/tmp/py-ard')
5+
context.ard = ARD('3440', data_dir='/tmp/py-ard')

tests/features/allele.feature

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,19 @@ Feature: Alleles
77
Then the reduced allele is found to be <Redux Allele>
88

99
Examples:
10-
| Allele | Level | Redux Allele |
11-
| A*01:01:01 | G | A*01:01:01G |
12-
| A*01:01:01 | lg | A*01:01g |
13-
| A*01:01:01 | lgx | A*01:01 |
10+
| Allele | Level | Redux Allele |
11+
| A*01:01:01 | G | A*01:01:01G |
12+
| A*01:01:01 | lg | A*01:01g |
13+
| A*01:01:01 | lgx | A*01:01 |
1414

15-
| HLA-A*01:01:01 | G | HLA-A*01:01:01G |
16-
| HLA-A*01:01:01 | lg | HLA-A*01:01g |
17-
| HLA-A*01:01:01 | lgx | HLA-A*01:01 |
15+
| HLA-A*01:01:01 | G | HLA-A*01:01:01G |
16+
| HLA-A*01:01:01 | lg | HLA-A*01:01g |
17+
| HLA-A*01:01:01 | lgx | HLA-A*01:01 |
1818

19-
| DRB1*14:05:01 | lgx | DRB1*14:05 |
20-
| DRB1*14:05:01 | lg | DRB1*14:05g |
19+
| DRB1*14:05:01 | lgx | DRB1*14:05 |
20+
| DRB1*14:05:01 | lg | DRB1*14:05g |
2121

22-
| DRB1*14:06:01 | lgx | DRB1*14:06 |
23-
| DRB1*14:06:01 | lg | DRB1*14:06g |
22+
| DRB1*14:06:01 | lgx | DRB1*14:06 |
23+
| DRB1*14:06:01 | lg | DRB1*14:06g |
24+
| C*02:02 | lg | C*02:02g/C*02:10g |
25+
| C*02:02 | lgx | C*02:02/C*02:10 |

0 commit comments

Comments
 (0)