Skip to content

Commit c986655

Browse files
committed
[terminology] refactoring of cache directory creation
1 parent 2c9b177 commit c986655

File tree

1 file changed

+27
-14
lines changed

1 file changed

+27
-14
lines changed

odml/terminology.py

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,33 @@
99
import odml.tools.xmlparser
1010
from hashlib import md5
1111
py3 = True
12-
1312
try:
1413
from urllib.request import urlopen
1514
except ImportError:
1615
from urllib import urlopen
17-
1816
import threading
1917

2018
CACHE_AGE = datetime.timedelta(days=14)
19+
CACHE_DIR = os.path.join(tempfile.gettempdir(), "odml.cache")
20+
FILE_MAP_FILE = os.path.join(CACHE_DIR, "odml_filemap.csv")
21+
if not os.path.exists(CACHE_DIR):
22+
try:
23+
os.makedirs(CACHE_DIR)
24+
except OSError: # might happen due to concurrency
25+
if not os.path.exists(CACHE_DIR):
26+
raise
27+
28+
29+
def open_file_map():
30+
file_map = {}
31+
if not os.path.exists(FILE_MAP_FILE):
32+
return file_map
33+
else:
34+
with open(FILE_MAP_FILE, 'r') as f:
35+
for l in f.readlines():
36+
parts = l.strip().split(';')
37+
file_map[parts[0].strip()] = parts[1].strip()
38+
return file_map
2139

2240

2341
def cache_load(url):
@@ -26,14 +44,7 @@ def cache_load(url):
2644
subsequent requests for this url will use the cached version
2745
"""
2846
filename = md5(url.encode()).hexdigest() + '__' + os.path.basename(url)
29-
cache_dir = os.path.join(tempfile.gettempdir(), "odml.cache")
30-
cache_file = os.path.join(cache_dir, filename)
31-
if not os.path.exists(cache_dir):
32-
try:
33-
os.makedirs(cache_dir)
34-
except OSError: # might happen due to concurrency
35-
if not os.path.exists(cache_dir):
36-
raise
47+
cache_file = os.path.join(CACHE_DIR, filename)
3748

3849
if not os.path.exists(cache_file) \
3950
or datetime.datetime.fromtimestamp(os.path.getmtime(cache_file)) < \
@@ -43,17 +54,16 @@ def cache_load(url):
4354
except Exception as e:
4455
print("Failed loading '%s': %s" % (url, e))
4556
return
46-
4757
fp = open(cache_file, "w")
4858
fp.write(data)
4959
fp.close()
50-
60+
with open(FILE_MAP_FILE, 'a') as fm:
61+
fm.write(filename + "; " + url + "\n")
5162
return open(cache_file)
5263

5364

5465
class Terminologies(dict):
5566
loading = {}
56-
5767
def load(self, url):
5868
"""
5969
load and cache a terminology-url
@@ -63,11 +73,14 @@ def load(self, url):
6373
if url in self:
6474
return self[url]
6575

76+
encode_name = md5(url.encode()).hexdigest() + '__' + os.path.basename(url)
77+
if encode_name in self:
78+
return self[encode_name]
79+
6680
if url in self.loading:
6781
self.loading[url].join()
6882
self.loading.pop(url, None)
6983
return self.load(url)
70-
7184
return self._load(url)
7285

7386
def _load(self, url):

0 commit comments

Comments
 (0)