fix: correct AnnotatedDocument kwarg from intervals= to extractions= (fixes langextract API)

github-actions[bot] · MervinPraison · github-actions[bot] · commit 8717b651ac0f · 2026-04-18T08:04:35.000Z
- Changed _create_annotated_document to build Extraction objects instead of CharInterval objects
- Updated AnnotatedDocument constructor to use extractions= kwarg instead of intervals=
- Each Extraction now properly wraps CharInterval as char_interval attribute
- Added extraction metadata (class, text, attributes) as required by langextract API
- All 16 tests now pass (previously 1 failing)

Co-authored-by: Mervin Praison &lt;MervinPraison@users.noreply.github.com&gt;
diff --git a/praisonai_tools/tools/langextract_tool.py b/praisonai_tools/tools/langextract_tool.py
@@ -46,30 +46,40 @@ def _get_langextract():
 
 
 def _create_annotated_document(text: str, extractions: List[str], document_id: str):
-    """Create langextract AnnotatedDocument with extractions as CharIntervals."""
+    """Create langextract AnnotatedDocument with extractions as Extraction objects."""
     lx = _get_langextract()
     if not lx:
         return None
-    
-    # Find all extraction positions in the text
-    intervals = []
-    for extraction in extractions:
+
+    # Find all extraction positions and wrap as Extraction objects
+    extraction_objects = []
+    for i, extraction_text in enumerate(extractions or []):
+        if not extraction_text.strip():
+            continue
         start_pos = 0
         while True:
-            pos = text.find(extraction, start_pos)
+            pos = text.lower().find(extraction_text.lower(), start_pos)
             if pos == -1:
                 break
-            intervals.append(lx.data.CharInterval(
-                start_pos=pos,
-                end_pos=pos + len(extraction)
+            extraction_objects.append(lx.data.Extraction(
+                extraction_class=f"extraction_{i}",
+                extraction_text=extraction_text,
+                char_interval=lx.data.CharInterval(
+                    start_pos=pos,
+                    end_pos=pos + len(extraction_text),
+                ),
+                attributes={
+                    "index": i,
+                    "original_text": extraction_text,
+                    "tool": "langextract_extract",
+                },
             ))
             start_pos = pos + 1
-    
-    # Create annotated document
+
     return lx.data.AnnotatedDocument(
         document_id=document_id,
         text=text,
-        intervals=intervals
+        extractions=extraction_objects,
     )