Skip to content

Commit 27fc288

Browse files
committed
bug fix and optimization
1 parent f26ee5e commit 27fc288

File tree

2 files changed

+43
-13
lines changed

2 files changed

+43
-13
lines changed

dicom_filter.py

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
import pydicom as dicom
88
import cv2
99
import json
10-
# from pflog import pflog
1110
from pydicom.pixel_data_handlers import convert_color_space
1211
import numpy as np
1312
import re
@@ -35,7 +34,7 @@
3534
help='comma separated dicom tags with values')
3635
parser.add_argument('-f', '--fileFilter', default='dcm', type=str,
3736
help='input file filter glob')
38-
parser.add_argument('-m', '--minImgCount', default='1', type=int,
37+
parser.add_argument('-m', '--minImgCount', default=1, type=int,
3938
help='A configurable threshold—any series with fewer images is dropped.')
4039
parser.add_argument('-V', '--version', action='version',
4140
version=f'%(prog)s {__version__}')
@@ -162,6 +161,31 @@ def passes_filters(ds, conditions):
162161

163162
return True
164163

164+
def split_text(text, max_len=50):
165+
"""
166+
Splits text into lines of at most `max_len` characters, preserving words.
167+
"""
168+
words = text.split()
169+
lines = []
170+
current_line = ""
171+
172+
for word in words:
173+
# Check if adding this word exceeds max_len
174+
if len(current_line) + len(word) + 1 <= max_len:
175+
if current_line:
176+
current_line += " " + word
177+
else:
178+
current_line = word
179+
else:
180+
lines.append(current_line)
181+
current_line = word
182+
183+
# Add the last line
184+
if current_line:
185+
lines.append(current_line)
186+
187+
return lines
188+
165189
def extract_text_from_pixeldata(ds):
166190
"""Return OCR-ed text from pixel data, or '' if unreadable."""
167191
try:
@@ -179,16 +203,14 @@ def extract_text_from_pixeldata(ds):
179203
return ""
180204

181205
text = pytesseract.image_to_string(img)
182-
return text.strip()
206+
clean_text = " ".join(text.splitlines())
207+
lines = split_text(clean_text)
208+
return lines
183209

184210
except Exception as e:
185211
print(f"OCR error: {e}")
186212
return ""
187213

188-
189-
190-
191-
192214
# The main function of this *ChRIS* plugin is denoted by this ``@chris_plugin`` "decorator."
193215
# Some metadata about the plugin is specified here. There is more metadata specified in setup.py.
194216
#
@@ -218,7 +240,7 @@ def main(options: Namespace, inputdir: Path, outputdir: Path):
218240

219241
# Exit if minimum image count is not met
220242
if len(mapper)<options.minImgCount:
221-
print(f"Total no. of images found ({len(mapper)}) is less than {options.minImgCount}. Exiting analysis..")
243+
print(f"Total no. of images found ({len(mapper)}) is less than specified ({options.minImgCount}). Exiting analysis..")
222244
return
223245
print(f"Total no. of images found: {len(mapper)}")
224246

@@ -281,8 +303,16 @@ def read_input_dicom(input_file_path, filter_expression, inspect_text):
281303
match = passes_filters(ds, conditions)
282304
print(f"Result: {'MATCH' if match else 'NO MATCH'}\n")
283305

284-
if inspect_text:
285-
print(extract_text_from_pixeldata(ds))
306+
# Run OCR if inspect_text == TRUE
307+
if inspect_text and extract_text_from_pixeldata(ds):
308+
lines_text = '\n'.join(extract_text_from_pixeldata(ds))
309+
print(
310+
f"\n########################## Detected Text #######################################"
311+
f"\n{lines_text}"
312+
f"\n################################################################################\n"
313+
)
314+
315+
return None
286316

287317
return ds if match else None
288318

requirements.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
chris_plugin>=0.3.0
22
opencv-python
33
pydicom
4-
# pflog==1.2.26
5-
# pftel-client~=1.0.6
4+
65
# for bug fix on transfer syntax errors
76
pylibjpeg
87
pylibjpeg-libjpeg
98
python-gdcm
9+
1010
# for running OCR
1111
pillow
12-
pytesseract
12+
pytesseract

0 commit comments

Comments
 (0)