Merge pull request #42 from pbashyal-nmdp/master

mmaiers-nmdp · web-flow · commit 4bb7c2411ed3 · 2020-07-22T09:12:26.000-05:00
Use given data_dir for mac files.
diff --git a/pyard/pyard.py b/pyard/pyard.py
@@ -128,15 +128,14 @@ def __init__(self, dbversion: str='Latest',
 
         imgt_hla_url = 'https://raw.githubusercontent.com/ANHIG/IMGTHLA/'
         ars_url = imgt_hla_url + dbversion + '/wmda/hla_nom_g.txt'
+        allele_url = imgt_hla_url + dbversion + "/Allelelist.txt"
+
         ars_file = data_dir + '/hla_nom_g.' + str(dbversion) + ".txt"
         allele_file = data_dir + '/AlleleList.' + str(dbversion) + ".txt"
         mac_file = data_dir + "/mac.txt"
         mac_pickle = data_dir + "/mac.pickle"
+        # dna_relshp.csv is part of the codebase
         broad_file = os.path.dirname(__file__) + "/dna_relshp.csv"
-        #print("mac_file:", mac_file)
-
-        allele_url = "https://raw.githubusercontent.com/ANHIG/IMGTHLA/" \
-                     + dbversion + "/Allelelist.txt"
 
         # Downloading ARS file
         if not os.path.isfile(ars_file):
@@ -155,7 +154,7 @@ def __init__(self, dbversion: str='Latest',
             if not os.path.isfile(mac_pickle):
                 if verbose:
                     logging.info("Downloading MAC file")
-                self.mac = all_macs(mac_file)
+                self.mac = all_macs(mac_file, data_dir=data_dir)
 
                 # Writing dict to pickle file
                 with open(mac_pickle, 'wb') as handle:
diff --git a/pyard/util.py b/pyard/util.py
@@ -21,37 +21,19 @@
 #    > http://www.fsf.org/licensing/licenses/lgpl.html
 #    > http://www.opensource.org/licenses/lgpl-license.php
 #
-import os
-import string
-import random as r
-from datetime import datetime, date
-from six import integer_types, iteritems
-import pandas as pd
 import copy
-import http.client
-import pickle
+import re
 import urllib.request
 import zipfile
-import re
+from datetime import datetime, date
+
+import pandas as pd
+from six import integer_types, iteritems
 
 
-# def all_macs(csv_file, url='hml.nmdp.org'):
-#     # conn = http.client.HTTPSConnection(url, 443)
-#     # conn.putrequest('GET', '/mac/api/codes')
-#     # conn.endheaders()
-#     # response = conn.getresponse().read().decode('utf8').splitlines()
-#     data = [l.split("\t")[1:3] for l in response]
-#     urllib.request.urlretrieve(url, 'numeric.v3.zip')
-#     df = pd.DataFrame(data, columns=['Code','Alleles'])
-#     df.to_csv(csv_file, header=True, index=False)
-#     df['Alleles'] = df['Alleles'].apply(lambda x: x.split("/"))
-#     mac_dict = df.set_index("Code").to_dict('index')
-#     return mac_dict
-
-def all_macs(csv_file, url='https://hml.nmdp.org/mac/files/numer.v3.zip'):
+def all_macs(csv_file, data_dir, url='https://hml.nmdp.org/mac/files/numer.v3.zip'):
     urllib.request.urlretrieve(url, 'numeric.v3.zip')
     zip_ref = zipfile.ZipFile('numeric.v3.zip', 'r')
-    data_dir = os.path.dirname(__file__)
     zip_ref.extractall(data_dir)
     zip_ref.close()
     data = []
@@ -62,7 +44,7 @@ def all_macs(csv_file, url='https://hml.nmdp.org/mac/files/numer.v3.zip'):
             if re.search("^\D", line) and not re.search("CODE", line) and not re.search("LAST", line):
                 data.append(line.split("\t"))
         f.close()
-    df = pd.DataFrame(data, columns=['Code','Alleles'])
+    df = pd.DataFrame(data, columns=['Code', 'Alleles'])
     df.to_csv(csv_file, header=True, index=False)
     df['Alleles'] = df['Alleles'].apply(lambda x: x.split("/"))
     mac_dict = df.set_index("Code").to_dict('index')
diff --git a/setup.py b/setup.py
@@ -42,7 +42,7 @@
 
 setup(
     name='py-ard',
-    version='0.0.16',
+    version='0.0.17',
     description="ARD reduction for HLA with python",
     long_description=readme + '\n\n' + history,
     author="CIBMTR",