9
9
import odml .tools .xmlparser
10
10
from hashlib import md5
11
11
py3 = True
12
-
13
12
try :
14
13
from urllib .request import urlopen
15
14
except ImportError :
16
15
from urllib import urlopen
17
-
18
16
import threading
19
17
20
18
CACHE_AGE = datetime .timedelta (days = 14 )
19
+ CACHE_DIR = os .path .join (tempfile .gettempdir (), "odml.cache" )
20
+ FILE_MAP_FILE = os .path .join (CACHE_DIR , "odml_filemap.csv" )
21
+ if not os .path .exists (CACHE_DIR ):
22
+ try :
23
+ os .makedirs (CACHE_DIR )
24
+ except OSError : # might happen due to concurrency
25
+ if not os .path .exists (CACHE_DIR ):
26
+ raise
27
+
28
+
29
+ def open_file_map ():
30
+ file_map = {}
31
+ if not os .path .exists (FILE_MAP_FILE ):
32
+ return file_map
33
+ else :
34
+ with open (FILE_MAP_FILE , 'r' ) as f :
35
+ for l in f .readlines ():
36
+ parts = l .strip ().split (';' )
37
+ file_map [parts [0 ].strip ()] = parts [1 ].strip ()
38
+ return file_map
21
39
22
40
23
41
def cache_load (url ):
@@ -26,14 +44,7 @@ def cache_load(url):
26
44
subsequent requests for this url will use the cached version
27
45
"""
28
46
filename = md5 (url .encode ()).hexdigest () + '__' + os .path .basename (url )
29
- cache_dir = os .path .join (tempfile .gettempdir (), "odml.cache" )
30
- cache_file = os .path .join (cache_dir , filename )
31
- if not os .path .exists (cache_dir ):
32
- try :
33
- os .makedirs (cache_dir )
34
- except OSError : # might happen due to concurrency
35
- if not os .path .exists (cache_dir ):
36
- raise
47
+ cache_file = os .path .join (CACHE_DIR , filename )
37
48
38
49
if not os .path .exists (cache_file ) \
39
50
or datetime .datetime .fromtimestamp (os .path .getmtime (cache_file )) < \
@@ -43,17 +54,16 @@ def cache_load(url):
43
54
except Exception as e :
44
55
print ("Failed loading '%s': %s" % (url , e ))
45
56
return
46
-
47
57
fp = open (cache_file , "w" )
48
58
fp .write (data )
49
59
fp .close ()
50
-
60
+ with open (FILE_MAP_FILE , 'a' ) as fm :
61
+ fm .write (filename + "; " + url + "\n " )
51
62
return open (cache_file )
52
63
53
64
54
65
class Terminologies (dict ):
55
66
loading = {}
56
-
57
67
def load (self , url ):
58
68
"""
59
69
load and cache a terminology-url
@@ -63,11 +73,14 @@ def load(self, url):
63
73
if url in self :
64
74
return self [url ]
65
75
76
+ encode_name = md5 (url .encode ()).hexdigest () + '__' + os .path .basename (url )
77
+ if encode_name in self :
78
+ return self [encode_name ]
79
+
66
80
if url in self .loading :
67
81
self .loading [url ].join ()
68
82
self .loading .pop (url , None )
69
83
return self .load (url )
70
-
71
84
return self ._load (url )
72
85
73
86
def _load (self , url ):
0 commit comments