forked from cocoindex-io/cocoindex
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
123 lines (97 loc) · 4 KB
/
main.py
File metadata and controls
123 lines (97 loc) · 4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
from datetime import timedelta
import os
import dataclasses
import cocoindex
from markdown_it import MarkdownIt
_markdown_it = MarkdownIt("gfm-like")
class LocalFileTarget(cocoindex.op.TargetSpec):
"""Represents the custom target spec."""
# The directory to save the HTML files.
directory: str
@dataclasses.dataclass
class LocalFileTargetValues:
"""Represents value fields of exported data. Used in `mutate` method below."""
html: str
@cocoindex.op.target_connector(spec_cls=LocalFileTarget)
class LocalFileTargetConnector:
@staticmethod
def get_persistent_key(spec: LocalFileTarget, target_name: str) -> str:
"""Use the directory path as the persistent key for this target."""
return spec.directory
@staticmethod
def describe(key: str) -> str:
"""(Optional) Return a human-readable description of the target."""
return f"Local directory {key}"
@staticmethod
def apply_setup_change(
key: str, previous: LocalFileTarget | None, current: LocalFileTarget | None
) -> None:
"""
Apply setup changes to the target.
Best practice: keep all actions idempotent.
"""
# Create the directory if it didn't exist.
if previous is None and current is not None:
os.makedirs(current.directory, exist_ok=True)
# Delete the directory with its contents if it no longer exists.
if previous is not None and current is None:
if os.path.isdir(previous.directory):
for filename in os.listdir(previous.directory):
if filename.endswith(".html"):
os.remove(os.path.join(previous.directory, filename))
os.rmdir(previous.directory)
@staticmethod
def prepare(spec: LocalFileTarget) -> LocalFileTarget:
"""
(Optional) Prepare for execution. To run common operations before applying any mutations.
The returned value will be passed as the first element of tuples in `mutate` method.
If not provided, will directly pass the spec to `mutate` method.
"""
return spec
@staticmethod
def mutate(
*all_mutations: tuple[LocalFileTarget, dict[str, LocalFileTargetValues | None]],
) -> None:
"""
Mutate the target.
The first element of the tuple is the target spec.
The second element is a dictionary of mutations:
- The key is the filename, and the value is the mutation.
- If the value is `None`, the file will be removed.
Otherwise, the file will be written with the content.
Best practice: keep all actions idempotent.
"""
for spec, mutations in all_mutations:
for filename, mutation in mutations.items():
full_path = os.path.join(spec.directory, filename) + ".html"
if mutation is None:
try:
os.remove(full_path)
except FileNotFoundError:
pass
else:
with open(full_path, "w") as f:
f.write(mutation.html)
@cocoindex.op.function()
def markdown_to_html(text: str) -> str:
return _markdown_it.render(text)
@cocoindex.flow_def(name="CustomOutputFiles")
def custom_output_files(
flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope
) -> None:
"""
Define an example flow that exports markdown files to HTML files.
"""
data_scope["documents"] = flow_builder.add_source(
cocoindex.sources.LocalFile(path="data", included_patterns=["*.md"]),
refresh_interval=timedelta(seconds=5),
)
output_html = data_scope.add_collector()
with data_scope["documents"].row() as doc:
doc["html"] = doc["content"].transform(markdown_to_html)
output_html.collect(filename=doc["filename"], html=doc["html"])
output_html.export(
"OutputHtml",
LocalFileTarget(directory="output_html"),
primary_key_fields=["filename"],
)