@@ -48,48 +48,13 @@ class ClassInfo:
4848 methods : cocoindex .typing .List [MethodInfo ]
4949
5050@dataclasses .dataclass
51- class ManualInfo :
51+ class ModuleInfo :
5252 title : str
5353 description : str
5454 classes : cocoindex .typing .Table [ClassInfo ]
5555 methods : cocoindex .typing .Table [MethodInfo ]
5656
5757
58- class ExtractManual (cocoindex .op .FunctionSpec ):
59- """Extract manual information from a Markdown."""
60-
61- @cocoindex .op .executor_class ()
62- class ExtractManualExecutor :
63- """Executor for ExtractManual."""
64-
65- spec : ExtractManual
66-
67- def __call__ (self , _markdown : str ) -> ManualInfo :
68- return ManualInfo (
69- title = "title_placeholder" ,
70- description = "description_placeholder" ,
71- classes = [
72- ClassInfo (
73- name = "class_name_placeholder" ,
74- description = "class_description_placeholder" ,
75- methods = [
76- MethodInfo (
77- name = "method_name_placeholder" ,
78- args = [ArgInfo (name = "arg_name_placeholder" , description = "arg_description_placeholder" )],
79- description = "method_description_placeholder"
80- )
81- ]
82- )
83- ],
84- methods = [
85- MethodInfo (
86- name = "method_name_placeholder" ,
87- args = [ArgInfo (name = "arg_name_placeholder" , description = "arg_description_placeholder" )],
88- description = "method_description_placeholder"
89- )
90- ]
91- )
92-
9358class CleanUpManual (cocoindex .op .FunctionSpec ):
9459 """Clean up manual information."""
9560
@@ -101,9 +66,9 @@ class CleanUpManualExecutor:
10166
10267 spec : CleanUpManual
10368
104- def __call__ (self , manual_info : ManualInfo ) -> ManualInfo | None :
69+ def __call__ (self , module_info : ModuleInfo ) -> ModuleInfo | None :
10570 # TODO: Clean up
106- return manual_info
71+ return module_info
10772
10873@cocoindex .flow_def (name = "ManualExtraction" )
10974def manual_extraction_flow (flow_builder : cocoindex .FlowBuilder , data_scope : cocoindex .DataScope ):
@@ -116,9 +81,15 @@ def manual_extraction_flow(flow_builder: cocoindex.FlowBuilder, data_scope: coco
11681
11782 with data_scope ["documents" ].row () as doc :
11883 doc ["markdown" ] = doc ["content" ].transform (PdfToMarkdown ())
119- doc ["raw_manual_info" ] = doc ["markdown" ].transform (ExtractManual ())
120- doc ["manual_info" ] = doc ["raw_manual_info" ].transform (CleanUpManual ())
121- manual_infos .collect (filename = doc ["filename" ], manual_info = doc ["manual_info" ])
84+ doc ["raw_module_info" ] = doc ["markdown" ].transform (
85+ cocoindex .functions .ExtractByMistral (
86+ model = cocoindex .functions .MistralModelSpec (
87+ model_id = "microsoft/Phi-3.5-mini-instruct" ,
88+ isq_type = "Q8_0" ),
89+ output_type = cocoindex .typing .encode_enriched_type (ModuleInfo ),
90+ instructions = "Please extract Python module information from the manual." ))
91+ doc ["module_info" ] = doc ["raw_module_info" ].transform (CleanUpManual ())
92+ manual_infos .collect (filename = doc ["filename" ], module_info = doc ["module_info" ])
12293
12394 manual_infos .export (
12495 "manual_infos" ,
0 commit comments