77import pydicom as dicom
88import cv2
99import json
10- from pflog import pflog
10+ # from pflog import pflog
1111from pydicom .pixel_data_handlers import convert_color_space
1212import numpy as np
13- __version__ = '1.2.6'
13+ import re
14+ from PIL import Image
15+ import pytesseract
16+
17+ __version__ = '1.2.7'
1418
1519DISPLAY_TITLE = r"""
1620 _ _ _ __ _ _ _
3135 help = 'comma separated dicom tags with values' )
3236parser .add_argument ('-f' , '--fileFilter' , default = 'dcm' , type = str ,
3337 help = 'input file filter glob' )
38+ parser .add_argument ('-m' , '--minImgCount' , default = '1' , type = int ,
39+ help = 'A configurable threshold—any series with fewer images is dropped.' )
3440parser .add_argument ('-V' , '--version' , action = 'version' ,
3541 version = f'%(prog)s { __version__ } ' )
36- parser .add_argument ('-t ' , '--outputType' , default = 'dcm' , type = str ,
42+ parser .add_argument ('-o ' , '--outputType' , default = 'dcm' , type = str ,
3743 help = 'output file type(extension only)' )
38- parser .add_argument ('-e ' , '--exclude ' , default = False , action = "store_true" ,
39- help = 'True means filter out, False means filter in .' )
44+ parser .add_argument ('-t ' , '--textInspect ' , default = False , action = "store_true" ,
45+ help = 'True means detect text in images, else no .' )
4046parser .add_argument ( '--pftelDB' ,
4147 dest = 'pftelDB' ,
4248 default = '' ,
4349 type = str ,
4450 help = 'optional pftel server DB path' )
4551
52+ class TagCondition :
53+ def __init__ (self , tag , op , values ):
54+ self .tag = tag
55+ self .op = op
56+ self .values = values # list for '=' OR values; length 1 otherwise
57+
58+ def __repr__ (self ):
59+ return f"<TagCondition { self .tag } { self .op } { self .values } >"
60+
61+ OPERATORS = ["!=" , ">=" , "<=" , "=" , ">" , "<" , "~" ]
62+
63+ def parse_filter_string (filter_str ):
64+ conditions = []
65+ parts = [p .strip () for p in filter_str .split ("," ) if p .strip ()]
66+
67+ for part in parts :
68+ # find operator
69+ op = None
70+ for candidate in OPERATORS :
71+ if candidate in part :
72+ op = candidate
73+ break
74+ if not op :
75+ raise ValueError (f"Invalid filter expression: { part } " )
76+
77+ tag , value = part .split (op , 1 )
78+ tag = tag .strip ().strip ('"' ).strip ("'" )
79+ value = value .strip ().strip ('"' ).strip ("'" )
80+
81+ # support OR-values for '=' operator: CT/MR/US
82+ if op == "=" and "/" in value :
83+ values = value .split ("/" )
84+ else :
85+ values = [value ]
86+
87+ conditions .append (TagCondition (tag , op , values ))
88+
89+ return conditions
90+
91+ def passes_filters (ds , conditions ):
92+ for cond in conditions :
93+ try :
94+ elem = ds .data_element (cond .tag )
95+ actual_full = str (elem ) # FULL element string (your requirement)
96+ except Exception :
97+ print (f"[{ cond .tag } ] MISSING TAG → fails condition { cond } " )
98+ return False
99+
100+ # This extracts ONLY the value part for numeric comparisons:
101+ # Example elem: "(0008,0020) Study Date DA: '20121126'"
102+ # Extracts "20121126"
103+ try :
104+ actual_value_only = str (elem .value )
105+ except Exception :
106+ actual_value_only = actual_full # fallback
107+
108+ # Expected string for printing
109+ expected_str = "/" .join (cond .values ) if cond .op == "=" else cond .values [0 ]
110+
111+ print (f"[{ cond .tag } ] expected: { cond .op } { expected_str } | actual: { actual_full } " )
112+
113+ # ---------------------------------------------------------------------
114+ # 1) Exact or OR matching against the FULL ELEMENT STRING
115+ # ---------------------------------------------------------------------
116+ if cond .op == "=" :
117+ if not any (v in actual_full for v in cond .values ):
118+ print (" -> FAIL (substring not found in element)" )
119+ return False
120+ print (" -> OK" )
121+ continue
122+
123+ # ---------------------------------------------------------------------
124+ # 2) Negated match against the FULL ELEMENT STRING
125+ # ---------------------------------------------------------------------
126+ elif cond .op == "!=" :
127+ if any (v in actual_full for v in cond .values ):
128+ print (" -> FAIL (excluded substring found in element)" )
129+ return False
130+ print (" -> OK" )
131+ continue
132+
133+ # ---------------------------------------------------------------------
134+ # 3) Numeric comparisons (value-only, not full element)
135+ # ---------------------------------------------------------------------
136+ elif cond .op in [">" , "<" , ">=" , "<=" ]:
137+ try :
138+ v = float (actual_value_only )
139+ c = float (cond .values [0 ])
140+ except ValueError :
141+ print (" -> FAIL (cannot extract numeric value)" )
142+ return False
143+
144+ result = eval (f"{ v } { cond .op } { c } " )
145+ print (f" -> { 'OK' if result else 'FAIL' } " )
146+
147+ if not result :
148+ return False
149+ continue
150+
151+ # ---------------------------------------------------------------------
152+ # 4) Regex (FULL element string)
153+ # ---------------------------------------------------------------------
154+ elif cond .op == "~" :
155+ pattern = cond .values [0 ]
156+ result = bool (re .search (pattern , actual_full ))
157+ print (f" -> { 'OK' if result else 'FAIL' } " )
158+
159+ if not result :
160+ return False
161+ continue
162+
163+ return True
164+
165+ def extract_text_from_pixeldata (ds ):
166+ """Return OCR-ed text from pixel data, or '' if unreadable."""
167+ try :
168+ if 'PixelData' not in ds :
169+ return ""
170+
171+ arr = ds .pixel_array
172+
173+ # Convert numpy array to PIL Image (auto-handles monochrome / RGB)
174+ if arr .ndim == 2 :
175+ img = Image .fromarray (arr )
176+ elif arr .ndim == 3 :
177+ img = Image .fromarray (arr )
178+ else :
179+ return ""
180+
181+ text = pytesseract .image_to_string (img )
182+ return text .strip ()
183+
184+ except Exception as e :
185+ print (f"OCR error: { e } " )
186+ return ""
187+
188+
189+
190+
46191
47192# The main function of this *ChRIS* plugin is denoted by this ``@chris_plugin`` "decorator."
48193# Some metadata about the plugin is specified here. There is more metadata specified in setup.py.
56201 min_cpu_limit = '1000m' , # millicores, e.g. "1000m" = 1 CPU core
57202 min_gpu_limit = 0 # set min_gpu_limit=1 to enable GPU
58203)
59- @pflog .tel_logTime (
60- event = 'dicom_filter' ,
61- log = 'Filter dicom files'
62- )
63204def main (options : Namespace , inputdir : Path , outputdir : Path ):
64205 """
65206 *ChRIS* plugins usually have two positional arguments: an **input directory** containing
@@ -74,9 +215,16 @@ def main(options: Namespace, inputdir: Path, outputdir: Path):
74215 print (DISPLAY_TITLE )
75216
76217 mapper = PathMapper .file_mapper (inputdir , outputdir , glob = f"**/*.{ options .fileFilter } " ,fail_if_empty = False )
218+
219+ # Exit if minimum image count is not met
220+ if len (mapper )< options .minImgCount :
221+ print (f"Total no. of images found ({ len (mapper )} ) is less than { options .minImgCount } . Exiting analysis.." )
222+ return
223+ print (f"Total no. of images found: { len (mapper )} " )
224+
77225 for input_file , output_file in mapper :
78226 # Read each input file from the input directory that matches the input filter specified
79- dcm_img = read_input_dicom (input_file , options .dicomFilter , options .exclude )
227+ dcm_img = read_input_dicom (input_file , options .dicomFilter , options .textInspect )
80228
81229 # check if a valid image file is returned
82230 if dcm_img is None :
@@ -107,46 +255,38 @@ def save_as_image(dcm_file, output_file_path, file_ext):
107255 cv2 .imwrite (output_file_path ,cv2 .cvtColor (pixel_array_numpy ,cv2 .COLOR_RGB2BGR ))
108256
109257
110-
111-
112-
113- def read_input_dicom (input_file_path , filters , exclude ):
258+ def read_input_dicom (input_file_path , filter_expression , inspect_text ):
114259 """
115- 1) Read an input dicom file
116- 2) Check if the dicom headers match the specified filters
117- 3) Return the dicom data set
260+ 1) Read an input DICOM file
261+ 2) Check if the DICOM headers match the specified filters
262+ 3) Return the DICOM dataset if it matches, else None
118263 """
119- ds = None
120- d_filter = json .loads (filters )
264+ conditions = parse_filter_string (filter_expression )
265+
266+ # Read DICOM
121267 try :
122- print (f"Reading input file : { input_file_path .name } " )
123- ds = dicom .dcmread (str (input_file_path ))
268+ print (f"Reading input file: { input_file_path .name } " )
269+ ds = dicom .dcmread (str (input_file_path ), stop_before_pixels = False )
270+
124271 if 'PixelData' not in ds :
125272 print ("No pixel data in this DICOM." )
126273 return None
127274
128275 except Exception as ex :
129- print (f"unable to read dicom file: { ex } \n " )
276+ print (f"Unable to read dicom file: { ex } " )
130277 return None
131278
132- for key , value in d_filter .items ():
133- try :
134- print (f"expected: { value } found: { ds .data_element (key )} exclude: { exclude } \n " )
135- if any (v in str (ds .data_element (key )) for v in value .split ("/" )):
136- continue
137- else :
138- if exclude :
139- return ds
140- print (f"file: { input_file_path .name } doesn't match filter criteria" )
141- return None
142- except Exception as ex :
143- print (f"Exception : { ex } " )
144- return None
279+ # Apply filters with verbose output
280+ print (f"\n Applying filter: { filter_expression } " )
281+ match = passes_filters (ds , conditions )
282+ print (f"Result: { 'MATCH' if match else 'NO MATCH' } \n " )
283+
284+ if inspect_text :
285+ print (extract_text_from_pixeldata (ds ))
286+
287+ return ds if match else None
288+
145289
146- if exclude :
147- print (f"file: { input_file_path .name } matches filter criteria" )
148- return None
149- return ds
150290
151291
152292def save_dicom (dicom_file , output_path ):
0 commit comments