88
99import cocoindex
1010
11+
1112@dataclasses .dataclass
1213class Contact :
1314 name : str
1415 phone : str
1516 relationship : str
1617
18+
1719@dataclasses .dataclass
1820class Address :
1921 street : str
2022 city : str
2123 state : str
2224 zip_code : str
2325
26+
2427@dataclasses .dataclass
2528class Pharmacy :
2629 name : str
2730 phone : str
2831 address : Address
2932
33+
3034@dataclasses .dataclass
3135class Insurance :
3236 provider : str
@@ -35,25 +39,30 @@ class Insurance:
3539 policyholder_name : str
3640 relationship_to_patient : str
3741
42+
3843@dataclasses .dataclass
3944class Condition :
4045 name : str
4146 diagnosed : bool
4247
48+
4349@dataclasses .dataclass
4450class Medication :
4551 name : str
4652 dosage : str
4753
54+
4855@dataclasses .dataclass
4956class Allergy :
5057 name : str
5158
59+
5260@dataclasses .dataclass
5361class Surgery :
5462 name : str
5563 date : str
5664
65+
5766@dataclasses .dataclass
5867class Patient :
5968 name : str
@@ -80,6 +89,7 @@ class Patient:
8089class ToMarkdown (cocoindex .op .FunctionSpec ):
8190 """Convert a document to markdown."""
8291
92+
8393@cocoindex .op .executor_class (gpu = True , cache = True , behavior_version = 1 )
8494class ToMarkdownExecutor :
8595 """Executor for ToMarkdown."""
@@ -99,25 +109,33 @@ def __call__(self, content: bytes, filename: str) -> str:
99109 text = self ._converter .convert (temp_file .name ).text_content
100110 return text
101111
112+
102113@cocoindex .flow_def (name = "PatientIntakeExtraction" )
103- def patient_intake_extraction_flow (flow_builder : cocoindex .FlowBuilder , data_scope : cocoindex .DataScope ):
114+ def patient_intake_extraction_flow (
115+ flow_builder : cocoindex .FlowBuilder , data_scope : cocoindex .DataScope
116+ ):
104117 """
105118 Define a flow that extracts patient information from intake forms.
106119 """
107120 data_scope ["documents" ] = flow_builder .add_source (
108- cocoindex .sources .LocalFile (path = "data/patient_forms" , binary = True ))
121+ cocoindex .sources .LocalFile (path = "data/patient_forms" , binary = True )
122+ )
109123
110124 patients_index = data_scope .add_collector ()
111125
112126 with data_scope ["documents" ].row () as doc :
113-
114- doc ["markdown" ] = doc ["content" ].transform (ToMarkdown (), filename = doc ["filename" ])
127+ doc ["markdown" ] = doc ["content" ].transform (
128+ ToMarkdown (), filename = doc ["filename" ]
129+ )
115130 doc ["patient_info" ] = doc ["markdown" ].transform (
116131 cocoindex .functions .ExtractByLlm (
117132 llm_spec = cocoindex .LlmSpec (
118- api_type = cocoindex .LlmApiType .OPENAI , model = "gpt-4o" ),
133+ api_type = cocoindex .LlmApiType .OPENAI , model = "gpt-4o"
134+ ),
119135 output_type = Patient ,
120- instruction = "Please extract patient information from the intake form." ))
136+ instruction = "Please extract patient information from the intake form." ,
137+ )
138+ )
121139 patients_index .collect (
122140 filename = doc ["filename" ],
123141 patient_info = doc ["patient_info" ],
0 commit comments