Skip to content

Commit 7e847c3

Browse files
authored
Switch to the ExtractByMistral in the manual_extraction example. #27 (#89)
Switch to the `ExtractByMistral` in the `manual_extraction` example. Doesn't work yet - engine side needs some debug.
1 parent 80f6071 commit 7e847c3

File tree

1 file changed

+12
-41
lines changed

1 file changed

+12
-41
lines changed

examples/manual_extraction/manual_extraction.py

Lines changed: 12 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -48,48 +48,13 @@ class ClassInfo:
4848
methods: cocoindex.typing.List[MethodInfo]
4949

5050
@dataclasses.dataclass
51-
class ManualInfo:
51+
class ModuleInfo:
5252
title: str
5353
description: str
5454
classes: cocoindex.typing.Table[ClassInfo]
5555
methods: cocoindex.typing.Table[MethodInfo]
5656

5757

58-
class ExtractManual(cocoindex.op.FunctionSpec):
59-
"""Extract manual information from a Markdown."""
60-
61-
@cocoindex.op.executor_class()
62-
class ExtractManualExecutor:
63-
"""Executor for ExtractManual."""
64-
65-
spec: ExtractManual
66-
67-
def __call__(self, _markdown: str) -> ManualInfo:
68-
return ManualInfo(
69-
title="title_placeholder",
70-
description="description_placeholder",
71-
classes=[
72-
ClassInfo(
73-
name="class_name_placeholder",
74-
description="class_description_placeholder",
75-
methods=[
76-
MethodInfo(
77-
name="method_name_placeholder",
78-
args=[ArgInfo(name="arg_name_placeholder", description="arg_description_placeholder")],
79-
description="method_description_placeholder"
80-
)
81-
]
82-
)
83-
],
84-
methods=[
85-
MethodInfo(
86-
name="method_name_placeholder",
87-
args=[ArgInfo(name="arg_name_placeholder", description="arg_description_placeholder")],
88-
description="method_description_placeholder"
89-
)
90-
]
91-
)
92-
9358
class CleanUpManual(cocoindex.op.FunctionSpec):
9459
"""Clean up manual information."""
9560

@@ -101,9 +66,9 @@ class CleanUpManualExecutor:
10166

10267
spec: CleanUpManual
10368

104-
def __call__(self, manual_info: ManualInfo) -> ManualInfo | None:
69+
def __call__(self, module_info: ModuleInfo) -> ModuleInfo | None:
10570
# TODO: Clean up
106-
return manual_info
71+
return module_info
10772

10873
@cocoindex.flow_def(name="ManualExtraction")
10974
def manual_extraction_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope):
@@ -116,9 +81,15 @@ def manual_extraction_flow(flow_builder: cocoindex.FlowBuilder, data_scope: coco
11681

11782
with data_scope["documents"].row() as doc:
11883
doc["markdown"] = doc["content"].transform(PdfToMarkdown())
119-
doc["raw_manual_info"] = doc["markdown"].transform(ExtractManual())
120-
doc["manual_info"] = doc["raw_manual_info"].transform(CleanUpManual())
121-
manual_infos.collect(filename=doc["filename"], manual_info=doc["manual_info"])
84+
doc["raw_module_info"] = doc["markdown"].transform(
85+
cocoindex.functions.ExtractByMistral(
86+
model=cocoindex.functions.MistralModelSpec(
87+
model_id="microsoft/Phi-3.5-mini-instruct",
88+
isq_type="Q8_0"),
89+
output_type=cocoindex.typing.encode_enriched_type(ModuleInfo),
90+
instructions="Please extract Python module information from the manual."))
91+
doc["module_info"] = doc["raw_module_info"].transform(CleanUpManual())
92+
manual_infos.collect(filename=doc["filename"], module_info=doc["module_info"])
12293

12394
manual_infos.export(
12495
"manual_infos",

0 commit comments

Comments
 (0)