From adbbabc10990b139f88496e77b241029cbc4c6d1 Mon Sep 17 00:00:00 2001 From: rsajulga-nmdp Date: Tue, 22 Feb 2022 14:32:52 -0600 Subject: [PATCH 1/4] add IMGT pull for TCE; update package specifications --- dpb1/tce.py | 21 ++++++++++++++++++++- requirements.txt | 3 ++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/dpb1/tce.py b/dpb1/tce.py index 3bdab39..610e24c 100644 --- a/dpb1/tce.py +++ b/dpb1/tce.py @@ -19,6 +19,7 @@ # import requests import json +import pandas as pd class TCE_map(object): @@ -26,11 +27,29 @@ def __init__(self, path='data/tce_assignments.txt', ard=None): self.path = path self.tce_assignments = self._get_tce_assignments() + print(self.tce_assignments) self.ard = ard def _get_tce_assignments(self): + """ + Obtains TCE assignments from a hard-coded from IMGT. + :return: Dictionary of alleles to TCE groups (3, 2, 1, 0). + :rtype: Dict[str, str] + """ with open(self.path, 'r') as f: - return json.loads(f.readline()) + tce_map = json.loads(f.readline()) + + allele_header = 'Allele' + tce_header = 'V2_Assignment' + url = 'https://raw.githubusercontent.com/ANHIG/IMGTHLA/Latest/tce/dpb_tce.csv' + tce_df = pd.read_csv(url, comment='#') + tce_df.replace({'\$' : '', 'a' : ''}, regex=True, inplace=True) + tce_df = tce_df[[allele_header, tce_header]] + tce_df = tce_df[~tce_df[tce_header].isnull()] + tce_df.set_index(allele_header, inplace=True) + tce_map_updated = tce_df.to_dict()[tce_header] + tce_map.update(tce_map_updated) + return tce_map def assign_tce(self, dpb1_allele): if dpb1_allele not in self.tce_assignments: diff --git a/requirements.txt b/requirements.txt index f7e02ea..759d5c6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,10 +5,11 @@ flask-restplus==0.13.0 behave==1.2.6 PyHamcrest==1.9.0 allure-behave==2.8.5 +itsdangerous==2.0.1 matplotlib==3.1.3 mpmath==1.1.0 numpy==1.18.1 -pandas==1.0.1 +pandas>=1.1.4 requests==2.22.0 seaborn==0.10.0 toolz==0.11.1 From d57940707d0cd1794f5223a47eb1fab02cc32756 Mon Sep 17 00:00:00 2001 From: rsajulga-nmdp Date: Fri, 25 Feb 2022 13:12:20 -0600 Subject: [PATCH 2/4] remove print statement --- dpb1/tce.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dpb1/tce.py b/dpb1/tce.py index 610e24c..eb2da09 100644 --- a/dpb1/tce.py +++ b/dpb1/tce.py @@ -27,7 +27,6 @@ def __init__(self, path='data/tce_assignments.txt', ard=None): self.path = path self.tce_assignments = self._get_tce_assignments() - print(self.tce_assignments) self.ard = ard def _get_tce_assignments(self): From 1dd67a4558afb72179c50b21938044636c6c5d04 Mon Sep 17 00:00:00 2001 From: rsajulga-nmdp Date: Mon, 28 Feb 2022 10:49:48 -0600 Subject: [PATCH 3/4] add new Genotype parameters --- dpb1/genotype.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dpb1/genotype.py b/dpb1/genotype.py index 4f670f9..21d46d4 100644 --- a/dpb1/genotype.py +++ b/dpb1/genotype.py @@ -31,7 +31,9 @@ class Genotype(object): :param name: A list of two Haplotype objects or a genotype name (str) """ - def __init__(self, haplotypes): + def __init__(self, haplotypes, index : int = None, prob : float = None): + self.index = index + self.prob = prob self.SLUGs = None if isinstance(haplotypes, str): self.name = haplotypes From 13901435181c929d53a5a618301af79df4fbc9a5 Mon Sep 17 00:00:00 2001 From: rsajulga-nmdp Date: Mon, 28 Feb 2022 10:51:05 -0600 Subject: [PATCH 4/4] fix BDD tests --- .../algorithm/call_rest_service.feature | 25 ++++++++++++++----- tests/steps/dpb1_perm_freq_generation.py | 2 +- tests/steps/match_tce_groups.py | 2 +- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/tests/features/algorithm/call_rest_service.feature b/tests/features/algorithm/call_rest_service.feature index 816f87a..93f9662 100644 --- a/tests/features/algorithm/call_rest_service.feature +++ b/tests/features/algorithm/call_rest_service.feature @@ -778,8 +778,15 @@ Feature: Call REST Service "dpb1_genotypes": [ { "genotype": "DPB1*04:01+DPB1*04:01", - "tce_groups": "3+3", - "probability": 1 + "tce_groups": "3+3" + }, + { + "genotype": "DPB1*04:01+DPB1*677:01", + "tce_groups": "3+3" + }, + { + "genotype": "DPB1*677:01+DPB1*677:01", + "tce_groups": "3+3" } ] } @@ -813,13 +820,19 @@ Feature: Call REST Service "dpb1_genotypes": [ { "genotype": "DPB1*03:01+DPB1*04:01", - "tce_groups": "2+3", - "probability": 0.5035143163576772 + "tce_groups": "2+3" + }, + { + "genotype": "DPB1*03:01+DPB1*677:01", + "tce_groups": "2+3" }, { "genotype": "DPB1*04:01+DPB1*14:01", - "tce_groups": "2+3", - "probability": 0.4964856836423228 + "tce_groups": "2+3" + }, + { + "genotype": "DPB1*14:01+DPB1*677:01", + "tce_groups": "2+3" } ] } diff --git a/tests/steps/dpb1_perm_freq_generation.py b/tests/steps/dpb1_perm_freq_generation.py index 680b1cf..c425e3f 100644 --- a/tests/steps/dpb1_perm_freq_generation.py +++ b/tests/steps/dpb1_perm_freq_generation.py @@ -1,6 +1,6 @@ from behave import * from hamcrest import assert_that, is_ -from validation.dpb1_validator.predicted import Predicted, PredictedPair +# from validation.dpb1_validator.predicted import Predicted, PredictedPair import pandas as pd from sigfig import round import json diff --git a/tests/steps/match_tce_groups.py b/tests/steps/match_tce_groups.py index e0da4b2..6daf5e0 100644 --- a/tests/steps/match_tce_groups.py +++ b/tests/steps/match_tce_groups.py @@ -3,7 +3,7 @@ from dpb1.match import Matches, MatchGrade from dpb1.dpb1 import DPB1_SLUG from dpb1.tce import TCE_SLUG -from validation.dpb1_validator.observed import ObservedPair +# from validation.dpb1_validator.observed import ObservedPair @given('these TCE group pairs and expected match categories') def step_impl(context):