Skip to content

Commit 8816ad4

Browse files
authored
Merge pull request #216 from pbashyal-nmdp/p_group_redux
P group redux
2 parents 6e874ad + 37252f7 commit 8816ad4

File tree

17 files changed

+124
-98
lines changed

17 files changed

+124
-98
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ ard.redux('B14', 'lg')
126126
| Reduction Type | Description |
127127
|----------------|-------------------------------------------------|
128128
| `G` | Reduce to G Group Level |
129+
| `P` | Reduce to P Group Level |
129130
| `lg` | Reduce to 2 field ARD level (append `g`) |
130131
| `lgx` | Reduce to 2 field ARD level |
131132
| `W` | Reduce/Expand to 3 field WHO nomenclature level |

api-spec.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ openapi: 3.0.3
22
info:
33
title: ARD Reduction
44
description: Reduce to ARD Level
5-
version: "1.0.0rc1"
5+
version: "1.0.0rc2"
66
servers:
77
- url: 'http://localhost:8080'
88
tags:

pyard/__init__.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
# -*- coding: utf-8 -*-
2-
from functools import lru_cache
32

43
#
54
# pyard pyARD.
@@ -24,11 +23,11 @@
2423
#
2524
from .blender import blender as dr_blender
2625
from .broad_splits import find_splits as find_broad_splits
26+
from .constants import DEFAULT_CACHE_SIZE
2727
from .misc import get_imgt_db_versions as db_versions
28-
from .misc import DEFAULT_CACHE_SIZE
2928

3029
__author__ = """NMDP Bioinformatics"""
31-
__version__ = "1.0.0rc1"
30+
__version__ = "1.0.0rc2"
3231

3332

3433
def init(

pyard/ard.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,24 +22,26 @@
2222
# > http://www.opensource.org/licenses/lgpl-license.php
2323
#
2424
import functools
25-
import sys
2625
import re
26+
import sys
2727
from typing import Iterable, List
2828

29-
from . import db
30-
from . import data_repository as dr
3129
from . import broad_splits
32-
from .smart_sort import smart_sort_comparator
30+
from . import data_repository as dr
31+
from . import db
3332
from .exceptions import InvalidAlleleError, InvalidMACError, InvalidTypingError
3433
from .misc import (
3534
get_n_field_allele,
3635
get_2field_allele,
37-
expression_chars,
38-
DEFAULT_CACHE_SIZE,
36+
validate_reduction_type,
37+
)
38+
from .constants import (
3939
HLA_regex,
4040
VALID_REDUCTION_TYPES,
41-
validate_reduction_type,
41+
expression_chars,
42+
DEFAULT_CACHE_SIZE,
4243
)
44+
from .smart_sort import smart_sort_comparator
4345

4446
default_config = {
4547
"reduce_serology": True,
@@ -90,9 +92,7 @@ def __init__(
9092
self.db_connection, _ = db.create_db_connection(data_dir, imgt_version)
9193

9294
# Load ARS mappings
93-
self.ars_mappings, p_group = dr.generate_ars_mapping(
94-
self.db_connection, imgt_version
95-
)
95+
self.ars_mappings = dr.generate_ars_mapping(self.db_connection, imgt_version)
9696
# Load Alleles and XX Codes
9797
(
9898
self.valid_alleles,
@@ -101,7 +101,7 @@ def __init__(
101101
self.who_group,
102102
self.exp_alleles,
103103
) = dr.generate_alleles_and_xx_codes_and_who(
104-
self.db_connection, imgt_version, self.ars_mappings, p_group
104+
self.db_connection, imgt_version, self.ars_mappings
105105
)
106106

107107
# Generate short nulls from WHO mapping
@@ -194,6 +194,8 @@ def _redux_allele(
194194
return self.ars_mappings.dup_g[allele]
195195
else:
196196
return self.ars_mappings.g_group[allele]
197+
elif redux_type == "P" and allele in self.ars_mappings.p_group:
198+
return self.ars_mappings.p_group[allele]
197199
elif redux_type in ["lgx", "lg"]:
198200
if allele in self.ars_mappings.dup_lgx:
199201
redux_allele = self.ars_mappings.dup_lgx[allele]

pyard/constants.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import re
2+
3+
DEFAULT_CACHE_SIZE = 1_000
4+
5+
HLA_regex = re.compile("^HLA-")
6+
7+
VALID_REDUCTION_TYPES = ["G", "P", "lg", "lgx", "W", "exon", "U2"]
8+
expression_chars = ["N", "Q", "L", "S"]
9+
# List of P and G characters
10+
PandG_chars = ["P", "G"]

pyard/data_repository.py

Lines changed: 15 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
import copy
2424
import functools
2525
import sqlite3
26-
from collections import namedtuple
2726

2827
import pandas as pd
2928

@@ -37,31 +36,15 @@
3736
load_serology_mappings,
3837
load_latest_version,
3938
)
40-
from .misc import expression_chars
39+
from .constants import expression_chars
40+
from .mappings import ars_mapping_tables, ARSMapping, code_mapping_tables
4141
from .misc import (
4242
get_2field_allele,
4343
get_3field_allele,
4444
number_of_fields,
4545
get_1field_allele,
4646
)
4747

48-
ars_mapping_tables = [
49-
"dup_g",
50-
"dup_lgx",
51-
"g_group",
52-
"lgx_group",
53-
"exon_group",
54-
"p_not_g",
55-
]
56-
ARSMapping = namedtuple("ARSMapping", ars_mapping_tables)
57-
58-
code_mapping_tables = [
59-
"alleles",
60-
"xx_codes",
61-
"who_alleles",
62-
"who_group",
63-
]
64-
6548

6649
def expression_reduce(df):
6750
"""
@@ -162,14 +145,22 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version):
162145
)
163146
exon_group = df_exon.set_index("A")["exon"].to_dict()
164147

165-
# save
166-
return db.save_ars_mappings(
167-
db_connection, dup_g, dup_lgx, exon_group, g_group, lgx_group, p_group, p_not_g
148+
ars_mapping = ARSMapping(
149+
dup_g=dup_g,
150+
dup_lgx=dup_lgx,
151+
g_group=g_group,
152+
p_group=p_group,
153+
lgx_group=lgx_group,
154+
exon_group=exon_group,
155+
p_not_g=p_not_g,
168156
)
157+
db.save_ars_mappings(db_connection, ars_mapping)
158+
159+
return ars_mapping
169160

170161

171162
def generate_alleles_and_xx_codes_and_who(
172-
db_connection: sqlite3.Connection, imgt_version, ars_mappings, p_group
163+
db_connection: sqlite3.Connection, imgt_version, ars_mappings
173164
):
174165
if db.tables_exist(db_connection, code_mapping_tables):
175166
return db.load_code_mappings(db_connection)
@@ -232,7 +223,7 @@ def generate_alleles_and_xx_codes_and_who(
232223
allele_df[["Allele", "2d"]].rename(columns={"2d": "nd"}),
233224
allele_df[["Allele", "3d"]].rename(columns={"3d": "nd"}),
234225
pd.DataFrame(ars_mappings.g_group.items(), columns=["Allele", "nd"]),
235-
pd.DataFrame(p_group.items(), columns=["Allele", "nd"]),
226+
pd.DataFrame(ars_mappings.p_group.items(), columns=["Allele", "nd"]),
236227
],
237228
ignore_index=True,
238229
)

pyard/db.py

Lines changed: 26 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
import sqlite3
2525
from typing import Tuple, Dict, Set, List
2626

27-
from .data_repository import ARSMapping
27+
from .mappings import ARSMapping
2828
from .misc import get_imgt_db_versions, get_default_db_directory
2929

3030

@@ -374,73 +374,68 @@ def load_ars_mappings(db_connection):
374374
db_connection, table_name="dup_lgx", columns=("allele", "lgx_group")
375375
)
376376
g_group = load_dict(db_connection, table_name="g_group", columns=("allele", "g"))
377+
p_group = load_dict(db_connection, table_name="p_group", columns=("allele", "p"))
377378
lgx_group = load_dict(
378379
db_connection, table_name="lgx_group", columns=("allele", "lgx")
379380
)
380381
exon_group = load_dict(
381382
db_connection, table_name="exon_group", columns=("allele", "exon")
382383
)
383384
p_not_g = load_dict(db_connection, table_name="p_not_g", columns=("allele", "lgx"))
384-
return (
385-
ARSMapping(
386-
dup_g=dup_g,
387-
dup_lgx=dup_lgx,
388-
g_group=g_group,
389-
lgx_group=lgx_group,
390-
exon_group=exon_group,
391-
p_not_g=p_not_g,
392-
),
393-
None,
385+
return ARSMapping(
386+
dup_g=dup_g,
387+
dup_lgx=dup_lgx,
388+
g_group=g_group,
389+
p_group=p_group,
390+
lgx_group=lgx_group,
391+
exon_group=exon_group,
392+
p_not_g=p_not_g,
394393
)
395394

396395

397-
def save_ars_mappings(
398-
db_connection, dup_g, dup_lgx, exon_group, g_group, lgx_group, p_group, p_not_g
399-
):
396+
def save_ars_mappings(db_connection: sqlite3.Connection, ars_mapping: ARSMapping):
400397
save_dict(
401398
db_connection,
402399
table_name="p_not_g",
403-
dictionary=p_not_g,
400+
dictionary=ars_mapping.p_not_g,
404401
columns=("allele", "lgx"),
405402
)
406403
save_dict(
407404
db_connection,
408405
table_name="dup_g",
409-
dictionary=dup_g,
406+
dictionary=ars_mapping.dup_g,
410407
columns=("allele", "g_group"),
411408
)
412409
save_dict(
413410
db_connection,
414411
table_name="dup_lgx",
415-
dictionary=dup_lgx,
412+
dictionary=ars_mapping.dup_lgx,
416413
columns=("allele", "lgx_group"),
417414
)
418415
save_dict(
419-
db_connection, table_name="g_group", dictionary=g_group, columns=("allele", "g")
416+
db_connection,
417+
table_name="g_group",
418+
dictionary=ars_mapping.g_group,
419+
columns=("allele", "g"),
420+
)
421+
save_dict(
422+
db_connection,
423+
table_name="p_group",
424+
dictionary=ars_mapping.p_group,
425+
columns=("allele", "p"),
420426
)
421427
save_dict(
422428
db_connection,
423429
table_name="lgx_group",
424-
dictionary=lgx_group,
430+
dictionary=ars_mapping.lgx_group,
425431
columns=("allele", "lgx"),
426432
)
427433
save_dict(
428434
db_connection,
429435
table_name="exon_group",
430-
dictionary=exon_group,
436+
dictionary=ars_mapping.exon_group,
431437
columns=("allele", "exon"),
432438
)
433-
return (
434-
ARSMapping(
435-
dup_g=dup_g,
436-
dup_lgx=dup_lgx,
437-
g_group=g_group,
438-
lgx_group=lgx_group,
439-
exon_group=exon_group,
440-
p_not_g=p_not_g,
441-
),
442-
p_group,
443-
)
444439

445440

446441
def save_code_mappings(

pyard/exceptions.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ def __str__(self) -> str:
2525

2626
class InvalidTypingError(PyArdError):
2727
def __init__(self, message: str, cause=None) -> None:
28+
super().__init__(message)
2829
self.cause = cause
2930

3031
def __str__(self) -> str:

pyard/mappings.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
from collections import namedtuple
2+
3+
ars_mapping_tables = [
4+
"dup_g",
5+
"dup_lgx",
6+
"g_group",
7+
"p_group",
8+
"lgx_group",
9+
"exon_group",
10+
"p_not_g",
11+
]
12+
code_mapping_tables = [
13+
"alleles",
14+
"exp_alleles",
15+
"xx_codes",
16+
"who_alleles",
17+
"who_group",
18+
]
19+
20+
ARSMapping = namedtuple("ARSMapping", ars_mapping_tables)

pyard/misc.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,9 @@
11
# List of expression characters
22
import pathlib
3-
import re
43
import tempfile
5-
from typing import List, Literal
4+
from typing import List
65

7-
HLA_regex = re.compile("^HLA-")
8-
9-
VALID_REDUCTION_TYPES = ["G", "lg", "lgx", "W", "exon", "U2"]
10-
expression_chars = ["N", "Q", "L", "S"]
11-
# List of P and G characters
12-
PandG_chars = ["P", "G"]
13-
14-
DEFAULT_CACHE_SIZE = 1_000
6+
from pyard.constants import VALID_REDUCTION_TYPES, expression_chars, PandG_chars
157

168

179
def get_n_field_allele(allele: str, n: int, preserve_expression=False) -> str:

0 commit comments

Comments
 (0)