refactor: change clean_vector_db default value to False

r3v5 · r3v5 · commit e344aefbfdfd · 2025-07-15T13:20:26.000+01:00
diff --git a/demos/kfp/docling/asr-conversion/README.md b/demos/kfp/docling/asr-conversion/README.md
@@ -106,7 +106,7 @@ The pipeline enables rich RAG applications that can answer questions about spoke
 -  `embed_model_id`: Embedding model to use (default: `ibm-granite/granite-embedding-125m-english`)
 -  `max_tokens`: Maximum tokens per chunk (default: 512)
 -  `use_gpu`: Whether to use GPU for processing (default: true)
--  `clean_vector_db`: if True, the vector database will be cleared during running the pipeline
+-  `clean_vector_db`: The vector database will be cleared during running the pipeline (default: false)
 
   
 ### Creating the Pipeline for running on GPU node
diff --git a/demos/kfp/docling/asr-conversion/docling_asr_convert_pipeline.py b/demos/kfp/docling/asr-conversion/docling_asr_convert_pipeline.py
@@ -489,7 +489,7 @@ def docling_convert_pipeline(
     embed_model_id: str = "ibm-granite/granite-embedding-125m-english",
     max_tokens: int = 512,
     use_gpu: bool = True,  # use only if you have additional gpu worker
-    clean_vector_db: bool = True,  # if True, the vector database will be cleared during running the pipeline
+    clean_vector_db: bool = False,  # if True, the vector database will be cleared during running the pipeline
 ) -> None:
     """
     Converts audio recordings to text using Docling ASR and generates embeddings
diff --git a/demos/kfp/docling/asr-conversion/docling_asr_convert_pipeline_compiled.yaml b/demos/kfp/docling/asr-conversion/docling_asr_convert_pipeline_compiled.yaml
@@ -4,7 +4,7 @@
 # Inputs:
 #    audio_filenames: str [Default: 'RAG_use_cases.wav, RAG_customers.wav, RAG_benefits.m4a, RAG_vs_Regular_LLM_Output.m4a']
 #    base_url: str [Default: 'https://raw.githubusercontent.com/opendatahub-io/rag/main/demos/testing-data/audio-speech']
-#    clean_vector_db: bool [Default: True]
+#    clean_vector_db: bool [Default: False]
 #    embed_model_id: str [Default: 'ibm-granite/granite-embedding-125m-english']
 #    max_tokens: int [Default: 512.0]
 #    num_workers: int [Default: 1.0]
@@ -2070,7 +2070,7 @@ root:
         isOptional: true
         parameterType: STRING
       clean_vector_db:
-        defaultValue: true
+        defaultValue: false
         description: boolean to enable/disable clearing the vector database before
           running the pipeline
         isOptional: true
diff --git a/demos/kfp/docling/asr-conversion/rag-agent/asr_rag_agent.ipynb b/demos/kfp/docling/asr-conversion/rag-agent/asr_rag_agent.ipynb