Skip to content

Commit c5e3f57

Browse files
author
tony.liu
committed
use llm for structuring
1 parent dc3a94d commit c5e3f57

File tree

3 files changed

+216
-145
lines changed

3 files changed

+216
-145
lines changed

transcription/qwen.py

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,24 @@ def map_to_logbook_template(json_output):
1717
with open("templates/modified_templates.json", 'r') as f:
1818
modified_templates = json.load(f)
1919

20-
# Get the field names in order
21-
modified_fields = list(modified_templates["adult_cardiac_log_2"].keys())
22-
logbook_fields = list(logbook_templates["Adult_cardiac_log_2"].keys())
20+
# Initialize result dictionary with all fields from both templates set to None
21+
result = {}
22+
result.update({key: None for key in logbook_templates["Adult_cardiac_log"].keys()})
23+
result.update({key: None for key in logbook_templates["Adult_cardiac_log_2"].keys()})
2324

24-
# Create result dictionary with all fields initialized to None
25-
result = {key: None for key in logbook_fields}
25+
# Map fields for adult_cardiac_log
26+
modified_fields_1 = list(modified_templates["Adult_cardiac_log"].keys())
27+
logbook_fields_1 = list(logbook_templates["Adult_cardiac_log"].keys())
2628

27-
# Map fields by position
28-
for modified_field, logbook_field in zip(modified_fields, logbook_fields):
29+
for modified_field, logbook_field in zip(modified_fields_1, logbook_fields_1):
30+
if modified_field in json_output:
31+
result[logbook_field] = json_output[modified_field].replace("###SECTION###", "")
32+
33+
# Map fields for adult_cardiac_log_2
34+
modified_fields_2 = list(modified_templates["adult_cardiac_log_2"].keys())
35+
logbook_fields_2 = list(logbook_templates["Adult_cardiac_log_2"].keys())
36+
37+
for modified_field, logbook_field in zip(modified_fields_2, logbook_fields_2):
2938
if modified_field in json_output:
3039
result[logbook_field] = json_output[modified_field].replace("###SECTION###", "")
3140

@@ -42,6 +51,21 @@ def process_image(image_path):
4251

4352
def qwen(image_paths=["../assets/kkl3.jpg", "../assets/kkl2.jpg"]):
4453
try:
54+
# Load templates first
55+
with open("templates/modified_templates.json", 'r') as f:
56+
modified_templates = json.load(f)
57+
58+
# Get all field names from both templates
59+
field_names = []
60+
field_names.extend(list(modified_templates["Adult_cardiac_log"].keys()))
61+
field_names.extend(list(modified_templates["adult_cardiac_log_2"].keys()))
62+
63+
if not field_names:
64+
raise ValueError("No field names found in templates")
65+
66+
# Ensure field names are unique
67+
field_names = list(dict.fromkeys(field_names))
68+
4569
# Load the model and processor with explicit trust_remote_code
4670
model_name = "Qwen/Qwen2.5-VL-3B-Instruct"
4771

@@ -188,9 +212,9 @@ def qwen(image_paths=["../assets/kkl3.jpg", "../assets/kkl2.jpg"]):
188212
section_order = ["basics", "case_details", "hpi", "social", "PMHx", "medications", "allergies", "exam", "veins", "allen_test", "INVx", "CXR/CT", "surgical_plan", "flags", "operative_notes", "post_op_notes", "learning_points"]
189213
combined_text = SECTION_SEPARATOR.join(all_transcribed_sections.get(section, "") for section in section_order)
190214

191-
# Convert to JSON using the existing parser
192-
json_output = process_text_file(combined_text)
193-
215+
# Convert to JSON using the text processor with field names
216+
json_output = process_text_file(combined_text, field_names)
217+
json_output['type'] = 'adult_cardiac_logs'
194218
# Map to logbook template format
195219
final_output = map_to_logbook_template(json_output)
196220

transcription/templates/modified_templates.json

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,58 @@
11
{
2+
"Adult_cardiac_log": {
3+
"Case No": null,
4+
"Type": null,
5+
"Patient ID": null,
6+
"Age": null,
7+
"Surgeon": null,
8+
"OR Date": null,
9+
"Indication for Surgery/Reason for Referral": null,
10+
"HPI": null,
11+
"PMHx HTM": null,
12+
"PMHx DM2": null,
13+
"PMHx DLP": null,
14+
"PMHx CVA": null,
15+
"Gender": null,
16+
"Social EtOH": null,
17+
"Smoking": null,
18+
"Drugs": null,
19+
"Allergies": null,
20+
"Meds [last dose]": null,
21+
"exam_weight": null,
22+
"exam_height": null,
23+
"exam_bmi": null,
24+
"Veins": null,
25+
"Allen Test": null,
26+
"Echo EF": null,
27+
"RVFx": null,
28+
"WMA": null,
29+
"Aorta": null,
30+
"Valves": null,
31+
"CXR": null,
32+
"exam_pulses_top_left": null,
33+
"exam_pulses_top_right": null,
34+
"exam_pulses_bottom_left": null,
35+
"exam_pulses_bottom_right": null,
36+
"invx_echo": null,
37+
"invx_hb": null,
38+
"invx_w": null,
39+
"invx_pl": null,
40+
"labs_w": null,
41+
"labs_hb": null,
42+
"labs_plt": null,
43+
"labs_hct": null,
44+
"labs_na": null,
45+
"labs_cl": null,
46+
"labs_bun": null,
47+
"labs_glu": null,
48+
"labs_creat": null,
49+
"labs_co2": null,
50+
"labs_k": null,
51+
"cath_image": null,
52+
"cath_text": null,
53+
"ct_image": null,
54+
"CT": null
55+
},
256
"adult_cardiac_log_2": {
357
"Surgical Plan": "",
458
"1 Operator": "",

0 commit comments

Comments
 (0)