Improve test assertions for Camel docling ocr

apupier · apupier · commit dcf7c01383c9 · 2026-01-22T17:02:08.000+01:00
note that the footer is not found

Signed-off-by: Aurélien Pupier &lt;apupier@ibm.com&gt;
diff --git a/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/OcrExtractionIT.java b/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/OcrExtractionIT.java
@@ -48,7 +48,7 @@
  * This test demonstrates how to use Docling's OCR capabilities to extract text from images containing text content.
  */
 @DisabledIfSystemProperty(named = "ci.env.name", matches = ".*", disabledReason = "Too much resources on GitHub Actions")
-public class OcrExtractionIT extends CamelTestSupport {
+class OcrExtractionIT extends CamelTestSupport {
 
     private static final Logger LOG = LoggerFactory.getLogger(OcrExtractionIT.class);
 
@@ -76,7 +76,7 @@ protected CamelContext createCamelContext() throws Exception {
     }
 
     @Test
-    public void testOcrTextExtractionFromImage() throws Exception {
+    void testOcrTextExtractionFromImage() throws Exception {
         Path testImage = createTestImageWithText();
 
         LOG.info("Created test image at: {}", testImage);
@@ -88,6 +88,12 @@ public void testOcrTextExtractionFromImage() throws Exception {
 
         LOG.info("OCR extraction result:\n{}", result);
 
+        checkExtractedText(result);
+
+        LOG.info("Successfully extracted text from image using OCR");
+    }
+
+    private void checkExtractedText(String result) {
         // Verify that at least some of the expected text was extracted
         // Note: OCR may not be 100% accurate, so we check for partial matches
         String resultLower = result.toLowerCase();
@@ -97,8 +103,6 @@ public void testOcrTextExtractionFromImage() throws Exception {
 
         assertTrue(foundHello || foundApache || foundOcr,
                 "OCR should extract at least some of the expected text. Got: " + result);
-
-        LOG.info("Successfully extracted text from image using OCR");
     }
 
     @Test
@@ -110,6 +114,8 @@ public void testOcrMarkdownConversionFromImage() throws Exception {
         assertNotNull(result, "Markdown result should not be null");
         assertTrue(result.length() > 0, "Markdown result should not be empty");
 
+        checkExtractedText(result);
+
         LOG.info("OCR Markdown conversion result:\n{}", result);
         LOG.info("Successfully converted image to Markdown using OCR");
     }
@@ -124,6 +130,8 @@ public void testOcrJsonConversionFromImage() throws Exception {
         assertTrue(result.length() > 0, "JSON result should not be empty");
         assertTrue(result.contains("{") || result.contains("["), "Result should be valid JSON");
 
+        checkExtractedText(result);
+
         LOG.info("OCR JSON conversion result:\n{}", result);
         LOG.info("Successfully converted image to JSON using OCR");
     }
@@ -137,6 +145,8 @@ public void testOcrWithAsyncMode() throws Exception {
         assertNotNull(result, "Async OCR result should not be null");
         assertTrue(result.length() > 0, "Async OCR result should not be empty");
 
+        checkExtractedText(result);
+
         LOG.info("Async OCR extraction result:\n{}", result);
         LOG.info("Successfully extracted text from image using async OCR");
     }
@@ -150,6 +160,8 @@ public void testOcrFromPngImage() throws Exception {
         assertNotNull(result, "OCR result from PNG should not be null");
         assertTrue(result.length() > 0, "OCR result from PNG should not be empty");
 
+        checkExtractedText(result);
+
         LOG.info("OCR extraction from PNG result:\n{}", result);
         LOG.info("Successfully extracted text from PNG image using OCR");
     }
@@ -163,6 +175,20 @@ public void testOcrWithMultipleTextBlocks() throws Exception {
         assertNotNull(result, "OCR result should not be null");
         assertTrue(result.length() > 0, "OCR result should not be empty");
 
+        // Verify that at least some of the expected text was extracted
+        // Note: OCR may not be 100% accurate, so we check for partial matches
+        String resultLower = result.toLowerCase();
+        boolean foundFirst = resultLower.contains("first");
+        boolean foundSecond = resultLower.contains("second");
+
+        assertTrue(foundFirst && foundSecond,
+                "OCR should extract at least some of the expected text. Got: " + result);
+
+        // TODO: footer is not found by the ocr by Camel docling
+        //        boolean foundFooter = resultLower.contains("footer");
+        //        assertTrue(foundFooter,
+        //                "OCR should extract at least some of the expected text from the footer. Got: " + result);
+
         LOG.info("OCR extraction with multiple text blocks result:\n{}", result);
         LOG.info("Successfully extracted text from image with multiple text blocks");
     }