Skip to content

Commit dce1ab7

Browse files
Merge pull request #20 from Bobbins228/reword-pdf-demo
docs: update wording in docling conversion/ingestion step
2 parents 839dae3 + 5f78c05 commit dce1ab7

File tree

2 files changed

+5
-5
lines changed

2 files changed

+5
-5
lines changed

demos/kfp/pdf-conversion/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ This pipeline converts your PDF documents to Markdown format using Docling, chun
3232
- Divides PDFs into batches for parallel processing
3333
- Configurable number of splits based on available workers
3434

35-
### 3. Docling Convert (`docling_convert`)
35+
### 3. Docling Convert and Ingest data into Llama Stack's Vector Store (`docling_convert_and_ingest`)
3636

3737
- Converts PDFs to Markdown using Docling
3838
- Generates embeddings using sentence transformers

demos/kfp/pdf-conversion/docling_convert_pipeline.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -95,12 +95,12 @@ def create_pdf_splits(
9595
splits = [batch for batch in (all_pdfs[i::num_splits] for i in range(num_splits)) if batch]
9696
return splits or [[]]
9797

98-
98+
# This component converts PDFs to Markdown and ingests the embeddings into LlamaStack's vector store
9999
@dsl.component(
100100
base_image=PYTORCH_CUDA_IMAGE,
101101
packages_to_install=["docling", "transformers", "sentence-transformers", "llama-stack", "llama-stack-client", "pymilvus", "fire"],
102102
)
103-
def docling_convert(
103+
def docling_convert_and_ingest(
104104
input_path: dsl.InputPath("input-pdfs"),
105105
pdf_split: List[str],
106106
output_path: dsl.OutputPath("output-md"),
@@ -264,7 +264,7 @@ def docling_convert_pipeline(
264264

265265
with dsl.ParallelFor(pdf_splits.output) as pdf_split:
266266
with dsl.If(use_gpu == True):
267-
convert_task = docling_convert(
267+
convert_task = docling_convert_and_ingest(
268268
input_path=import_task.output,
269269
pdf_split=pdf_split,
270270
embed_model_id=embed_model_id,
@@ -282,7 +282,7 @@ def docling_convert_pipeline(
282282
add_toleration_json(convert_task, [{"effect": "NoSchedule", "key": "nvidia.com/gpu", "operator": "Exists"}])
283283
add_node_selector_json(convert_task, {})
284284
with dsl.Else():
285-
convert_task = docling_convert(
285+
convert_task = docling_convert_and_ingest(
286286
input_path=import_task.output,
287287
pdf_split=pdf_split,
288288
embed_model_id=embed_model_id,

0 commit comments

Comments
 (0)