33__license__ = "MIT"
44
55import re
6+ from collections import defaultdict
7+ from contextlib import contextmanager
8+ from copy import deepcopy
9+ from functools import cache
610
11+ from pydicom import FileDataset
712from pydicom .dataelem import DataElement
813from pydicom .dataset import Dataset , FileMetaDataset , RawDataElement
914from pydicom .sequence import Sequence
@@ -47,12 +52,10 @@ def stripped_tag(self):
4752
4853 # Contains
4954
50- def name_contains (self , expression , whole_string = False ):
55+ def name_contains (self , expression ):
5156 """
52- Determine if a name contains a pattern or expression.
53- Use whole_string to match the entire string exactly (True),
54- or partially (False).
55- Use re to search a field for a regular expression, meaning
57+ Determine if a name contains an expression.
58+ Use expression to search a field, meaning
5659 the name, the keyword (nested) or the string tag.
5760 name.lower: includes nested keywords (e.g., Sequence_Child)
5861 self.element.name: is the human friendly name "Sequence Child"
@@ -62,7 +65,7 @@ def name_contains(self, expression, whole_string=False):
6265 - Patient's Name (tag name)
6366 - patient's name (lowercase tag name)
6467 - PatientName (tag keyword)
65- - PatientN (tag keyword partial match, with whole_string=False )
68+ - PatientN (tag keyword partial match)
6669 - (0010,0010) (parentheses-enclosed, comma-separated group, element)
6770 - 00100010 (stripped group, element)
6871
@@ -77,14 +80,17 @@ def name_contains(self, expression, whole_string=False):
7780 - PRIVATE_CREATOR: Private creator string in double quotes
7881 - ELEMENT_OFFSET: 2-digit hexadecimal element number (last 8 bits of full element)
7982 """
80- regexp_expression = f"^{ expression } $" if whole_string else expression
83+ if isinstance (expression , str ):
84+ expression = re .compile (expression , re .IGNORECASE )
85+
8186 if (
82- re .search (regexp_expression , self .name .lower ())
83- or f"({ self .element .tag .group :04X} ,{ self .element .tag .element :04X} )" .lower ()
84- == expression .lower ()
85- or re .search (regexp_expression , self .stripped_tag )
86- or re .search (regexp_expression , self .element .name )
87- or re .search (regexp_expression , self .element .keyword )
87+ expression .search (self .name .lower ())
88+ or expression .search (
89+ f"({ self .element .tag .group :04X} ,{ self .element .tag .element :04X} )" .lower ()
90+ )
91+ or expression .search (self .stripped_tag )
92+ or expression .search (self .element .name )
93+ or expression .search (self .element .keyword )
8894 ):
8995 return True
9096
@@ -99,11 +105,10 @@ def name_contains(self, expression, whole_string=False):
99105 # The GROUP is the 4-digit hex group number (e.g., 0033)
100106 # The PRIVATE_CREATOR is the private creator string in quotes
101107 # The ELEMENT_OFFSET is the 2-digit hex element number (masked to last 8 bits)
102- stripped_private_tag = f'{ self .element .tag .group :04X} ,"{ self .element .private_creator } ",{ (self .element .tag .element & 0xFF ):02X} '
108+ stripped_private_tag = f'{ self .element .tag .group :04X} ,"{ self .element .private_creator } ",{ (self .element .tag .element & 0xFF ):02X} '
103109 private_tag = "(" + stripped_private_tag + ")"
104- if (
105- re .search (regexp_expression , stripped_private_tag , re .IGNORECASE )
106- or private_tag .lower () == expression .lower ()
110+ if re .search (expression , stripped_private_tag ) or re .search (
111+ expression , private_tag
107112 ):
108113 return True
109114 return False
@@ -225,29 +230,25 @@ def expand_field_expression(field, dicom, contenders=None):
225230 allfields: include all fields
226231 exceptfields: filter to all fields except those listed ( | separated)
227232
228- Returns: a list of DicomField objects
233+ Returns: a dictionary of DicomField objects
229234 """
230235 # Expanders that don't have a : must be checked for
231236 expanders = ["all" ]
232237
233238 # if no contenders provided, use top level of dicom headers
234239 if contenders is None :
235- contenders = get_fields (dicom )
240+ contenders = get_fields_with_lookup (dicom )
236241
237242 # Case 1: field is an expander without an argument (e.g., no :)
238243 if field .lower () in expanders :
239244 if field .lower () == "all" :
240- fields = contenders
245+ fields = deepcopy ( contenders . fields )
241246 return fields
242247
243248 # Case 2: The field is a specific field OR an expander with argument (A:B)
244249 fields = field .split (":" , 1 )
245250 if len (fields ) == 1 :
246- return {
247- uid : field
248- for uid , field in contenders .items ()
249- if field .name_contains (fields [0 ], whole_string = True )
250- }
251+ return {field .uid : field for field in contenders .get_exact_matches (fields [0 ])}
251252
252253 # if we get down here, we have an expander and expression
253254 expander , expression = fields
@@ -260,32 +261,205 @@ def expand_field_expression(field, dicom, contenders=None):
260261 elif expander .lower () == "startswith" :
261262 expression = "^(%s)" % expression
262263
264+ expression_re = None
263265 # Loop through fields, all are strings STOPPED HERE NEED TO ADDRESS EMPTY NAME
264266 for uid , field in contenders .items ():
265- # Apply expander to string for name OR to tag string
266- if expander .lower () in ["endswith" , "startswith" , "contains" ]:
267- if field .name_contains (expression ):
268- fields [uid ] = field
267+ if isinstance (field , str ) and string_matches_expander (
268+ expander , expression , field
269+ ):
270+ fields [uid ] = field
271+ elif isinstance (field , DicomField ) and field_matches_expander (
272+ expander , expression , expression_re , field
273+ ):
274+ fields [uid ] = field
269275
270- elif expander .lower () == "except" :
271- if not field .name_contains (expression ):
272- fields [uid ] = field
276+ return fields
273277
274- elif expander .lower () == "select" :
275- if field .select_matches (expression ):
276- fields [uid ] = field
277278
278- return fields
279+ def string_matches_expander (expander , expression_string , string ):
280+ """
281+ Check if a string matches an expression for a given expander operator.
282+ """
283+ if expander .lower () in ["endswith" , "startswith" , "contains" ]:
284+ return expression_string in string
285+
286+ elif expander .lower () == "except" :
287+ return expression_string not in string
288+
289+ # Note: "select" expanders are not applicable to string values, as
290+ # they do not have any attributes.
291+ return False
292+
293+
294+ def field_matches_expander (expander , expression_string , expression_re , field ):
295+ """
296+ Check if a field matches an expression for a given expander operator.
297+
298+ The `expression_re` argument is an optional arg that can be used to cache
299+ the compiled regex for re-use in later invocations.
300+ """
301+ # Apply expander to string for name OR to tag string
302+ if expander .lower () == "select" :
303+ return field .select_matches (expression_string )
304+
305+ if expression_re is None :
306+ expression_re = re .compile (expression_string , re .IGNORECASE )
307+
308+ if expander .lower () in ["endswith" , "startswith" , "contains" ]:
309+ return field .name_contains (expression_re )
279310
311+ elif expander .lower () == "except" :
312+ return not field .name_contains (expression_re )
280313
281- def get_fields (dicom , skip = None , expand_sequences = True , seen = None ):
282- """Expand all dicom fields into a list.
314+ return False
315+
316+
317+ class FieldsWithLookups :
318+ """
319+ This class is a wrapper around a dictionary of DicomField objects keyed by uid,
320+ with some supplemental lookup tables to enable rapid field lookup.
321+ """
322+
323+ def __init__ (self , fields ):
324+ self .fields = fields
325+
326+ self .lookup_tables = {
327+ "name" : defaultdict (list ),
328+ "tag" : defaultdict (list ),
329+ "stripped_tag" : defaultdict (list ),
330+ "element_name" : defaultdict (list ),
331+ "element_keyword" : defaultdict (list ),
332+ }
333+ for uid , field in fields .items ():
334+ self ._add_field_to_lookup (field )
335+
336+ def get_exact_matches (self , field ):
337+ """
338+ Get exact case-insensitive matches for a field name or tag.
339+
340+ Returns a list of DicomField objects.
341+ """
342+ if isinstance (field , str ):
343+ field = field .lower ()
344+ exact_match_contenders = (
345+ self .lookup_tables ["name" ][field ]
346+ + self .lookup_tables ["tag" ][field ]
347+ + self .lookup_tables ["stripped_tag" ][field ]
348+ + self .lookup_tables ["element_name" ][field ]
349+ + self .lookup_tables ["element_keyword" ][field ]
350+ )
351+ return exact_match_contenders
352+
353+ def __getitem__ (self , key ):
354+ return self .fields [key ]
355+
356+ def __setitem__ (self , key , value ):
357+ self .fields [key ] = value
358+
359+ def __iter__ (self ):
360+ return iter (self .fields )
361+
362+ def items (self ):
363+ return self .fields .items ()
364+
365+ def values (self ):
366+ return self .fields .values ()
367+
368+ def add (self , uid , field ):
369+ self .fields [uid ] = field
370+ self ._add_field_to_lookup (field )
371+
372+ def _get_field_lookup_keys (self , field ):
373+ if not isinstance (field , DicomField ):
374+ case_insensitive_field = field .lower ()
375+ self .lookup_tables ["name" ][case_insensitive_field ].append (
376+ case_insensitive_field
377+ )
378+ return {"name" : [case_insensitive_field ]}
379+ lookup_keys = {}
380+ if field .name :
381+ lookup_keys ["name" ] = [field .name .lower ()]
382+ if field .tag :
383+ lookup_keys ["tag" ] = [field .tag .lower ()]
384+ if field .stripped_tag :
385+ lookup_keys ["stripped_tag" ] = [field .stripped_tag .lower ()]
386+ if field .element .name :
387+ lookup_keys ["element_name" ] = [field .element .name .lower ()]
388+ if field .element .keyword :
389+ lookup_keys ["element_keyword" ] = [field .element .keyword .lower ()]
390+ if field .element .is_private :
391+ lookup_keys ["name" ] = lookup_keys .get ("name" , []) + [
392+ f'({ field .element .tag .group :04X} ,"{ field .element .private_creator } ",{ (field .element .tag .element & 0x00FF ):02X} )' .lower (),
393+ f'{ field .element .tag .group :04X} ,"{ field .element .private_creator } ",{ (field .element .tag .element & 0x00FF ):02X} ' .lower (),
394+ ]
395+ return lookup_keys
396+
397+ def _add_field_to_lookup (self , field ):
398+ for table_name , lookup_keys in self ._get_field_lookup_keys (field ).items ():
399+ for key in lookup_keys :
400+ self .lookup_tables [table_name ][key ].append (field )
401+
402+ def remove (self , uid ):
403+ if uid not in self .fields :
404+ return
405+ field = self .fields [uid ]
406+ for table_name , lookup_keys in self ._get_field_lookup_keys (field ).items ():
407+ for key in lookup_keys :
408+ if field not in self .lookup_tables [table_name ][key ]:
409+ continue
410+ self .lookup_tables [table_name ][key ].remove (field )
411+ if not self .lookup_tables [table_name ][key ]:
412+ del self .lookup_tables [table_name ][key ]
413+ del self .fields [uid ]
414+
415+
416+ @contextmanager
417+ def override_attr (obj , attr , value ):
418+ """
419+ Temporarily override an attribute of an object.
420+ """
421+ attribute_undefined = not hasattr (obj , attr )
422+ try :
423+ original_value = getattr (obj , attr , None )
424+ setattr (obj , attr , value )
425+ yield
426+ finally :
427+ if attribute_undefined :
428+ delattr (obj , attr )
429+ else :
430+ setattr (obj , attr , original_value )
431+
432+
433+ def get_fields_with_lookup (dicom , skip = None , expand_sequences = True , seen = None ):
434+ """Expand all dicom fields into a list, along with lookup tables keyed on
435+ different field properties.
283436
284437 Each entry is a DicomField. If we find a sequence, we unwrap it and
285438 represent the location with the name (e.g., Sequence__Child)
286439 """
287- skip = skip or []
288- seen = seen or []
440+ # NOTE: this hashing function is required to enable caching on
441+ # `get_fields_inner`. While it is not ideal to override the hashing
442+ # behavior of the PyDicom FileDataset class, it appears to be the
443+ # only way to enable the use of caching without incurring significant
444+ # performance overhead. Note that adding a proxy class around this
445+ # decreases performance substantially (50% slowdown measured).
446+ with override_attr (FileDataset , "__hash__" , lambda self : id (self )):
447+ fields , new_seen , new_skip = _get_fields_inner (
448+ dicom ,
449+ skip = tuple (skip ) if skip else None ,
450+ expand_sequences = expand_sequences ,
451+ seen = tuple (seen ) if seen else None ,
452+ )
453+ skip = new_skip
454+ seen = new_seen
455+
456+ return FieldsWithLookups (fields )
457+
458+
459+ @cache
460+ def _get_fields_inner (dicom , skip = None , expand_sequences = True , seen = None ):
461+ skip = list (skip ) if skip else []
462+ seen = list (seen ) if seen else []
289463 fields = {} # indexed by nested tag
290464
291465 if not isinstance (skip , list ):
@@ -357,4 +531,4 @@ def add_element(element, name, uid, is_filemeta):
357531 "Unrecognized type %s in extract sequences, skipping." % type (item )
358532 )
359533
360- return fields
534+ return fields , seen , skip
0 commit comments