2121# > http://www.fsf.org/licensing/licenses/lgpl.html
2222# > http://www.opensource.org/licenses/lgpl-license.php
2323#
24- import os
25- import string
26- import random as r
27- from datetime import datetime , date
28- from six import integer_types , iteritems
29- import pandas as pd
3024import copy
31- import http .client
32- import pickle
25+ import re
3326import urllib .request
3427import zipfile
35- import re
28+ from datetime import datetime , date
29+
30+ import pandas as pd
31+ from six import integer_types , iteritems
3632
3733
38- # def all_macs(csv_file, url='hml.nmdp.org'):
39- # # conn = http.client.HTTPSConnection(url, 443)
40- # # conn.putrequest('GET', '/mac/api/codes')
41- # # conn.endheaders()
42- # # response = conn.getresponse().read().decode('utf8').splitlines()
43- # data = [l.split("\t")[1:3] for l in response]
44- # urllib.request.urlretrieve(url, 'numeric.v3.zip')
45- # df = pd.DataFrame(data, columns=['Code','Alleles'])
46- # df.to_csv(csv_file, header=True, index=False)
47- # df['Alleles'] = df['Alleles'].apply(lambda x: x.split("/"))
48- # mac_dict = df.set_index("Code").to_dict('index')
49- # return mac_dict
50-
51- def all_macs (csv_file , url = 'https://hml.nmdp.org/mac/files/numer.v3.zip' ):
34+ def all_macs (csv_file , data_dir , url = 'https://hml.nmdp.org/mac/files/numer.v3.zip' ):
5235 urllib .request .urlretrieve (url , 'numeric.v3.zip' )
5336 zip_ref = zipfile .ZipFile ('numeric.v3.zip' , 'r' )
54- data_dir = os .path .dirname (__file__ )
5537 zip_ref .extractall (data_dir )
5638 zip_ref .close ()
5739 data = []
@@ -62,7 +44,7 @@ def all_macs(csv_file, url='https://hml.nmdp.org/mac/files/numer.v3.zip'):
6244 if re .search ("^\D" , line ) and not re .search ("CODE" , line ) and not re .search ("LAST" , line ):
6345 data .append (line .split ("\t " ))
6446 f .close ()
65- df = pd .DataFrame (data , columns = ['Code' ,'Alleles' ])
47+ df = pd .DataFrame (data , columns = ['Code' , 'Alleles' ])
6648 df .to_csv (csv_file , header = True , index = False )
6749 df ['Alleles' ] = df ['Alleles' ].apply (lambda x : x .split ("/" ))
6850 mac_dict = df .set_index ("Code" ).to_dict ('index' )
0 commit comments