docling-project · cau-git · Mar 17, 2026 · Mar 12, 2026
diff --git a/docling/pipeline/standard_pdf_pipeline.py b/docling/pipeline/standard_pdf_pipeline.py
@@ -506,10 +506,14 @@ def _init_models(self) -> None:
         self.reading_order_model = ReadingOrderModel(options=ReadingOrderOptions())
 
         # --- optional enrichment ------------------------------------------------
-        # Update code_formula_options to match the boolean flags
-        code_formula_opts = self.pipeline_options.code_formula_options
-        code_formula_opts.extract_code = self.pipeline_options.do_code_enrichment
-        code_formula_opts.extract_formulas = self.pipeline_options.do_formula_enrichment
+        # Create a copy to avoid mutating pipeline_options in-place,
+        # which would change its hash and break pipeline caching (#3109).
+        code_formula_opts = self.pipeline_options.code_formula_options.model_copy(
+            update={
+                "extract_code": self.pipeline_options.do_code_enrichment,
+                "extract_formulas": self.pipeline_options.do_formula_enrichment,
+            }
+        )
 
         self.enrichment_pipe = [
             # Code Formula Enrichment Model (using new VLM runtime system)

diff --git a/tests/test_options.py b/tests/test_options.py
@@ -191,6 +191,34 @@ def test_parser_backends(test_doc_path):
         assert doc_result.status == ConversionStatus.SUCCESS
 
 
+def test_pipeline_cache_after_initialize(test_doc_path):
+    """Test that initialize_pipeline caches correctly and convert reuses the cache.
+
+    Regression test for #3109: code_formula_options were mutated in-place during
+    pipeline initialization, changing the options hash and causing a cache miss
+    when convert() was called afterwards.
+    """
+    pipeline_options = PdfPipelineOptions()
+    pipeline_options.do_ocr = False
+    pipeline_options.do_table_structure = False
+
+    converter = DocumentConverter(
+        format_options={
+            InputFormat.PDF: PdfFormatOption(
+                pipeline_options=pipeline_options,
+            )
+        }
+    )
+
+    converter.initialize_pipeline(InputFormat.PDF)
+    assert len(converter._get_initialized_pipelines()) == 1
+
+    converter.convert(test_doc_path)
+    assert len(converter._get_initialized_pipelines()) == 1, (
+        "Pipeline should be reused from cache, not re-initialized"
+    )
+
+
 def test_confidence(test_doc_path):
     converter = DocumentConverter()
     doc_result: ConversionResult = converter.convert(test_doc_path, page_range=(6, 9))