|
| 1 | +from datetime import timedelta |
| 2 | +import os |
| 3 | +import dataclasses |
| 4 | + |
| 5 | +import cocoindex |
| 6 | +from markdown_it import MarkdownIt |
| 7 | + |
| 8 | +_markdown_it = MarkdownIt("gfm-like") |
| 9 | + |
| 10 | + |
| 11 | +class LocalFileTarget(cocoindex.op.TargetSpec): |
| 12 | + """Represents the custom target spec.""" |
| 13 | + |
| 14 | + # The directory to save the HTML files. |
| 15 | + directory: str |
| 16 | + |
| 17 | + |
| 18 | +@dataclasses.dataclass |
| 19 | +class LocalFileTargetValues: |
| 20 | + """Represents value fields of exported data. Used in `mutate` method below.""" |
| 21 | + |
| 22 | + html: str |
| 23 | + |
| 24 | + |
| 25 | +@cocoindex.op.target_connector(spec_cls=LocalFileTarget) |
| 26 | +class LocalFileTargetConnector: |
| 27 | + @staticmethod |
| 28 | + def get_persistent_key(spec: LocalFileTarget, target_name: str) -> str: |
| 29 | + """Use the directory path as the persistent key for this target.""" |
| 30 | + return spec.directory |
| 31 | + |
| 32 | + @staticmethod |
| 33 | + def describe(key: str) -> str: |
| 34 | + """(Optional) Return a human-readable description of the target.""" |
| 35 | + return f"Local directory {key}" |
| 36 | + |
| 37 | + @staticmethod |
| 38 | + def apply_setup_change( |
| 39 | + key: str, previous: LocalFileTarget | None, current: LocalFileTarget | None |
| 40 | + ) -> None: |
| 41 | + """ |
| 42 | + Apply setup changes to the target. |
| 43 | +
|
| 44 | + Best practice: keep all actions idempotent. |
| 45 | + """ |
| 46 | + |
| 47 | + # Create the directory if it didn't exist. |
| 48 | + if previous is None and current is not None: |
| 49 | + os.makedirs(current.directory, exist_ok=True) |
| 50 | + |
| 51 | + # Delete the directory with its contents if it no longer exists. |
| 52 | + if previous is not None and current is None: |
| 53 | + if os.path.isdir(previous.directory): |
| 54 | + for filename in os.listdir(previous.directory): |
| 55 | + if filename.endswith(".html"): |
| 56 | + os.remove(os.path.join(previous.directory, filename)) |
| 57 | + os.rmdir(previous.directory) |
| 58 | + |
| 59 | + @staticmethod |
| 60 | + def prepare(spec: LocalFileTarget) -> LocalFileTarget: |
| 61 | + """ |
| 62 | + (Optional) Prepare for execution. To run common operations before applying any mutations. |
| 63 | + The returned value will be passed as the first element of tuples in `mutate` method. |
| 64 | +
|
| 65 | + If not provided, will directly pass the spec to `mutate` method. |
| 66 | + """ |
| 67 | + return spec |
| 68 | + |
| 69 | + @staticmethod |
| 70 | + def mutate( |
| 71 | + *all_mutations: tuple[LocalFileTarget, dict[str, LocalFileTargetValues | None]], |
| 72 | + ) -> None: |
| 73 | + """ |
| 74 | + Mutate the target. |
| 75 | +
|
| 76 | + The first element of the tuple is the target spec. |
| 77 | + The second element is a dictionary of mutations: |
| 78 | + - The key is the filename, and the value is the mutation. |
| 79 | + - If the value is `None`, the file will be removed. |
| 80 | + Otherwise, the file will be written with the content. |
| 81 | +
|
| 82 | + Best practice: keep all actions idempotent. |
| 83 | + """ |
| 84 | + for spec, mutations in all_mutations: |
| 85 | + for filename, mutation in mutations.items(): |
| 86 | + full_path = os.path.join(spec.directory, filename) + ".html" |
| 87 | + if mutation is None: |
| 88 | + try: |
| 89 | + os.remove(full_path) |
| 90 | + except FileNotFoundError: |
| 91 | + pass |
| 92 | + else: |
| 93 | + with open(full_path, "w") as f: |
| 94 | + f.write(mutation.html) |
| 95 | + |
| 96 | + |
| 97 | +@cocoindex.op.function() |
| 98 | +def markdown_to_html(text: str) -> str: |
| 99 | + return _markdown_it.render(text) |
| 100 | + |
| 101 | + |
| 102 | +@cocoindex.flow_def(name="CustomOutputFiles") |
| 103 | +def custom_output_files( |
| 104 | + flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope |
| 105 | +) -> None: |
| 106 | + """ |
| 107 | + Define an example flow that exports markdown files to HTML files. |
| 108 | + """ |
| 109 | + data_scope["documents"] = flow_builder.add_source( |
| 110 | + cocoindex.sources.LocalFile(path="data", included_patterns=["*.md"]), |
| 111 | + refresh_interval=timedelta(seconds=5), |
| 112 | + ) |
| 113 | + |
| 114 | + output_html = data_scope.add_collector() |
| 115 | + with data_scope["documents"].row() as doc: |
| 116 | + doc["html"] = doc["content"].transform(markdown_to_html) |
| 117 | + output_html.collect(filename=doc["filename"], html=doc["html"]) |
| 118 | + |
| 119 | + output_html.export( |
| 120 | + "OutputHtml", |
| 121 | + LocalFileTarget(directory="output_html"), |
| 122 | + primary_key_fields=["filename"], |
| 123 | + ) |
0 commit comments