77import pydicom as dicom
88import cv2
99import json
10- # from pflog import pflog
1110from pydicom .pixel_data_handlers import convert_color_space
1211import numpy as np
1312import re
3534 help = 'comma separated dicom tags with values' )
3635parser .add_argument ('-f' , '--fileFilter' , default = 'dcm' , type = str ,
3736 help = 'input file filter glob' )
38- parser .add_argument ('-m' , '--minImgCount' , default = '1' , type = int ,
37+ parser .add_argument ('-m' , '--minImgCount' , default = 1 , type = int ,
3938 help = 'A configurable threshold—any series with fewer images is dropped.' )
4039parser .add_argument ('-V' , '--version' , action = 'version' ,
4140 version = f'%(prog)s { __version__ } ' )
@@ -162,6 +161,31 @@ def passes_filters(ds, conditions):
162161
163162 return True
164163
164+ def split_text (text , max_len = 50 ):
165+ """
166+ Splits text into lines of at most `max_len` characters, preserving words.
167+ """
168+ words = text .split ()
169+ lines = []
170+ current_line = ""
171+
172+ for word in words :
173+ # Check if adding this word exceeds max_len
174+ if len (current_line ) + len (word ) + 1 <= max_len :
175+ if current_line :
176+ current_line += " " + word
177+ else :
178+ current_line = word
179+ else :
180+ lines .append (current_line )
181+ current_line = word
182+
183+ # Add the last line
184+ if current_line :
185+ lines .append (current_line )
186+
187+ return lines
188+
165189def extract_text_from_pixeldata (ds ):
166190 """Return OCR-ed text from pixel data, or '' if unreadable."""
167191 try :
@@ -179,16 +203,14 @@ def extract_text_from_pixeldata(ds):
179203 return ""
180204
181205 text = pytesseract .image_to_string (img )
182- return text .strip ()
206+ clean_text = " " .join (text .splitlines ())
207+ lines = split_text (clean_text )
208+ return lines
183209
184210 except Exception as e :
185211 print (f"OCR error: { e } " )
186212 return ""
187213
188-
189-
190-
191-
192214# The main function of this *ChRIS* plugin is denoted by this ``@chris_plugin`` "decorator."
193215# Some metadata about the plugin is specified here. There is more metadata specified in setup.py.
194216#
@@ -218,7 +240,7 @@ def main(options: Namespace, inputdir: Path, outputdir: Path):
218240
219241 # Exit if minimum image count is not met
220242 if len (mapper )< options .minImgCount :
221- print (f"Total no. of images found ({ len (mapper )} ) is less than { options .minImgCount } . Exiting analysis.." )
243+ print (f"Total no. of images found ({ len (mapper )} ) is less than specified ( { options .minImgCount } ) . Exiting analysis.." )
222244 return
223245 print (f"Total no. of images found: { len (mapper )} " )
224246
@@ -281,8 +303,16 @@ def read_input_dicom(input_file_path, filter_expression, inspect_text):
281303 match = passes_filters (ds , conditions )
282304 print (f"Result: { 'MATCH' if match else 'NO MATCH' } \n " )
283305
284- if inspect_text :
285- print (extract_text_from_pixeldata (ds ))
306+ # Run OCR if inspect_text == TRUE
307+ if inspect_text and extract_text_from_pixeldata (ds ):
308+ lines_text = '\n ' .join (extract_text_from_pixeldata (ds ))
309+ print (
310+ f"\n ########################## Detected Text #######################################"
311+ f"\n { lines_text } "
312+ f"\n ################################################################################\n "
313+ )
314+
315+ return None
286316
287317 return ds if match else None
288318
0 commit comments