9
9
import odml .tools .xmlparser
10
10
from hashlib import md5
11
11
py3 = True
12
-
13
12
try :
14
13
from urllib .request import urlopen
15
14
except ImportError :
16
15
from urllib import urlopen
17
-
18
16
import threading
19
17
20
- CACHE_AGE = datetime .timedelta (days = 1 )
18
+ CACHE_AGE = datetime .timedelta (days = 14 )
19
+ CACHE_DIR = os .path .join (tempfile .gettempdir (), "odml.cache" )
20
+ FILE_MAP_FILE = os .path .join (CACHE_DIR , "odml_filemap.csv" )
21
+
22
+ if not os .path .exists (CACHE_DIR ):
23
+ try :
24
+ os .makedirs (CACHE_DIR )
25
+ except OSError : # might happen due to concurrency
26
+ if not os .path .exists (CACHE_DIR ):
27
+ raise
28
+
29
+
30
+ def open_file_map ():
31
+ """
32
+ Opens the file_map file stored in the cache that maps the filenames to the urls of the
33
+ respective terminolgies.
34
+ """
35
+ file_map = {}
36
+ if not os .path .exists (FILE_MAP_FILE ):
37
+ return file_map
38
+ else :
39
+ with open (FILE_MAP_FILE , 'r' ) as f :
40
+ for l in f .readlines ():
41
+ parts = l .strip ().split (';' )
42
+ file_map [parts [0 ].strip ()] = parts [1 ].strip ()
43
+ return file_map
21
44
22
45
23
46
def cache_load (url ):
24
47
"""
25
- load the url and store it in a temporary cache directory
48
+ Load the url and store it in a temporary cache directory
26
49
subsequent requests for this url will use the cached version
27
50
"""
28
- filename = md5 (url .encode ()).hexdigest () + os .path .basename (url )
29
- cache_dir = os .path .join (tempfile .gettempdir (), "odml.cache" )
30
- if not os .path .exists (cache_dir ):
31
- try :
32
- os .makedirs (cache_dir )
33
- except OSError : # might happen due to concurrency
34
- if not os .path .exists (cache_dir ):
35
- raise
36
- cache_file = os .path .join (cache_dir , filename )
51
+ filename = md5 (url .encode ()).hexdigest () + '__' + os .path .basename (url )
52
+ cache_file = os .path .join (CACHE_DIR , filename )
53
+
37
54
if not os .path .exists (cache_file ) \
38
55
or datetime .datetime .fromtimestamp (os .path .getmtime (cache_file )) < \
39
56
datetime .datetime .now () - CACHE_AGE :
@@ -42,18 +59,69 @@ def cache_load(url):
42
59
except Exception as e :
43
60
print ("Failed loading '%s': %s" % (url , e ))
44
61
return
45
-
46
62
fp = open (cache_file , "w" )
47
63
fp .write (data )
48
64
fp .close ()
49
-
65
+ with open (FILE_MAP_FILE , 'a' ) as fm :
66
+ fm .write (filename + "; " + url + "\n " )
50
67
return open (cache_file )
51
68
52
69
70
+ def cached_files ():
71
+ """
72
+ Returns a list of all locally cached files.
73
+ """
74
+ filelist = [ f for f in os .listdir (CACHE_DIR ) if \
75
+ (f .endswith (".xml" ) and os .path .isfile (os .path .join (CACHE_DIR , f )))]
76
+ return filelist
77
+
78
+
79
+ def show_cache ():
80
+ """
81
+ Show all locally cached files. Just for display.
82
+ """
83
+ print ("terminology %s \t updated" % (19 * " " ))
84
+ print (60 * "-" )
85
+ files = cached_files ()
86
+ for f in files :
87
+ cache_file = os .path .join (CACHE_DIR , f )
88
+ file_timestamp = datetime .datetime .fromtimestamp (os .path .getmtime (cache_file ))
89
+ disp_name = '_' .join (f .split ('__' )[1 :])
90
+ if len (disp_name ) > 30 :
91
+ disp_name = disp_name [:16 ] + "..."
92
+ if len (disp_name ) < 30 :
93
+ disp_name = disp_name + (30 - len (disp_name )) * " "
94
+ print (" %s \t %s" % (disp_name , file_timestamp ))
95
+
96
+
97
+ def clear_cache ():
98
+ """
99
+ Clears the cache, i.e. deletes all locally stored files. Does not remove the cache folder, though.
100
+ """
101
+ filelist = cached_files ();
102
+ for f in filelist :
103
+ os .remove (os .path .join (CACHE_DIR , f ))
104
+ if os .path .exists (FILE_MAP_FILE ):
105
+ os .remove (FILE_MAP_FILE )
106
+
107
+
108
+ def from_cache (term ):
109
+ """
110
+ Fills the terminology with the definitions stored in the cache.
111
+ """
112
+ assert isinstance (term , Terminologies )
113
+ file_list = cached_files ();
114
+ file_map = open_file_map ();
115
+ for f in file_map :
116
+ if file_map [f ] not in term :
117
+ term .load (file_map [f ])
118
+
119
+
53
120
class Terminologies (dict ):
54
121
loading = {}
122
+ types = None
55
123
56
- def load (self , url ):
124
+ def load (self , url = "http://portal.g-node.org/odml/terminologies/v1.0/terminologies.xml" ):
57
125
"""
58
126
load and cache a terminology-url
59
127
@@ -62,11 +130,14 @@ def load(self, url):
62
130
if url in self :
63
131
return self [url ]
64
132
133
+ encode_name = md5 (url .encode ()).hexdigest () + '__' + os .path .basename (url )
134
+ if encode_name in self :
135
+ return self [encode_name ]
136
+
65
137
if url in self .loading :
66
138
self .loading [url ].join ()
67
139
self .loading .pop (url , None )
68
140
return self .load (url )
69
-
70
141
return self ._load (url )
71
142
72
143
def _load (self , url ):
@@ -95,14 +166,129 @@ def deferred_load(self, url):
95
166
self .loading [url ] = threading .Thread (target = self ._load , args = (url ,))
96
167
self .loading [url ].start ()
97
168
169
+ def empty (self ):
170
+ """
171
+ Tells whether there are no terminolgies stored.
172
+ """
173
+ return len (self ) == 0
174
+
175
+ def type_list (self ):
176
+ """
177
+ returns a dict of all types stored in the cache together with the terminologies it is defined in.
178
+ """
179
+ if self .empty ():
180
+ from_cache (self )
181
+ if not self .types :
182
+ self .types = {}
183
+ for k in self .items ():
184
+ for s in k [1 ].itersections ():
185
+ if s .type in self .types :
186
+ self .types [s .type ].append ((k [0 ], s .get_path ()))
187
+ else :
188
+ self .types [s .type ] = [(k [0 ], s .get_path ())]
189
+ return self .types
190
+
191
+ def _compare_repo (self , candidate_repo , candidate_path , pattern , relaxed ):
192
+ parts = pattern .lower ().split ()
193
+ match = True
194
+ repo = candidate_repo .lower ()
195
+ path = candidate_path .lower ()
196
+ for p in parts :
197
+ if p .startswith ("!" ):
198
+ if relaxed :
199
+ match = match or (p [1 :] not in repo .lower () and p [1 :] not in path )
200
+ else :
201
+ match = match and (p [1 :] not in repo and p [1 :] not in path )
202
+ else :
203
+ if relaxed :
204
+ match = match or (p in repo or p in path )
205
+ else :
206
+ match = match and (p in repo or p in path )
207
+ return match
208
+
209
+ def _find_match (self , type_matches , pattern , relaxed = False ):
210
+ if pattern :
211
+ matches = []
212
+ for i , (r , p ) in enumerate (type_matches ):
213
+ if self ._compare_repo (r , p , pattern , relaxed ):
214
+ matches .append (type_matches [i ])
215
+ return matches
216
+ else : # simply return first
217
+ return type_matches
218
+ return []
219
+
220
+ def _get_section_by_type (self , section_type , pattern = None , relaxed = False , find_all = False ):
221
+ if self .empty () or len (self .types ) == 0 :
222
+ self .type_list ()
223
+ matches = []
224
+ if section_type in self .types :
225
+ matches = self ._find_match (self .types [section_type ], pattern , relaxed )
226
+ if len (matches ) > 0 :
227
+ if len (matches ) > 1 and find_all :
228
+ sections = []
229
+ for m in matches :
230
+ sections .append (self [m [0 ]].get_section_by_path (m [1 ]).clone ())
231
+ return sections
232
+ else :
233
+ return self [matches [0 ][0 ]].get_section_by_path (matches [0 ][1 ]).clone ()
234
+ else :
235
+ return None
236
+
237
+
98
238
terminologies = Terminologies ()
99
239
load = terminologies .load
100
240
deferred_load = terminologies .deferred_load
101
241
102
242
243
+ def get_section_by_type (section_type , pattern = None , relaxed = False , find_all = False ):
244
+ """
245
+ Finds a section type in the cached repositories and returns it.
246
+
247
+ @param section_type the type of the section must be a valid full match. Returns the
248
+ first match.
249
+ @param pattern a optional filter pattern, i.e. a string with characteristics
250
+ regarding the repository the section should originate from
251
+ and its path in the file (see below)
252
+ @param relaxed optional, defines whether all criteria must be met or not.
253
+ @param find_all optional, sets whether all possible matches are returned
254
+
255
+ @return Section or list of sections depending on the find_all parameter, None,
256
+ if no match was found.
257
+
258
+ Example:
259
+ Suppose we are looking for a section type 'analysis' and it should be from the g-node
260
+ terminologies.
261
+ s = get_section_by_type("analysis", "g-node")
262
+ print(s)
263
+ <Section Analysis[analysis] (0)>
264
+ If we want to exclude the g-node terminologies, simply put an ! in front of the pattern
265
+ s = get_section_by_type("analysis", "!g-node")
266
+
267
+ Multiple criteria can be combined (e.g. get_section_by_type("setup/daq", "g-node blackrock !cerebus")).
268
+ The relaxed parameter controls whether all criteria have to match.
269
+ """
270
+ return terminologies ._get_section_by_type (section_type , pattern , relaxed , find_all )
271
+
272
+ def find_definitions (section_type ):
273
+ """
274
+ Finds repositories that define the provided section type.
275
+
276
+ @param section_type the requested section type
277
+
278
+ @return list of tuples containing the repository and the path at which the respective
279
+ section can be found. List may be empty.
280
+ """
281
+ tl = terminologies .type_list ()
282
+ if section_type in tl :
283
+ return tl [section_type ]
284
+ else :
285
+ return []
286
+
103
287
if __name__ == "__main__" :
288
+ from IPython import embed
104
289
print ("Terminologies!" )
105
- t = Terminologies ( )
106
- t .load ('http://portal.g-node.org/odml/terminologies/v1.0/terminologies.xml' )
290
+ from_cache ( terminologies )
291
+ # t.load('http://portal.g-node.org/odml/terminologies/v1.0/terminologies.xml')
107
292
# t.load('http://portal.g-node.org/odml/terminologies/v1.0/analysis/power_spectrum.xml')
108
-
293
+ find_definitions ("analysis" )
294
+ embed ()
0 commit comments