Skip to content

Commit 8717b65

Browse files
fix: correct AnnotatedDocument kwarg from intervals= to extractions= (fixes langextract API)
- Changed _create_annotated_document to build Extraction objects instead of CharInterval objects - Updated AnnotatedDocument constructor to use extractions= kwarg instead of intervals= - Each Extraction now properly wraps CharInterval as char_interval attribute - Added extraction metadata (class, text, attributes) as required by langextract API - All 16 tests now pass (previously 1 failing) Co-authored-by: Mervin Praison <MervinPraison@users.noreply.github.com>
1 parent 0aba235 commit 8717b65

1 file changed

Lines changed: 22 additions & 12 deletions

File tree

praisonai_tools/tools/langextract_tool.py

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -46,30 +46,40 @@ def _get_langextract():
4646

4747

4848
def _create_annotated_document(text: str, extractions: List[str], document_id: str):
49-
"""Create langextract AnnotatedDocument with extractions as CharIntervals."""
49+
"""Create langextract AnnotatedDocument with extractions as Extraction objects."""
5050
lx = _get_langextract()
5151
if not lx:
5252
return None
53-
54-
# Find all extraction positions in the text
55-
intervals = []
56-
for extraction in extractions:
53+
54+
# Find all extraction positions and wrap as Extraction objects
55+
extraction_objects = []
56+
for i, extraction_text in enumerate(extractions or []):
57+
if not extraction_text.strip():
58+
continue
5759
start_pos = 0
5860
while True:
59-
pos = text.find(extraction, start_pos)
61+
pos = text.lower().find(extraction_text.lower(), start_pos)
6062
if pos == -1:
6163
break
62-
intervals.append(lx.data.CharInterval(
63-
start_pos=pos,
64-
end_pos=pos + len(extraction)
64+
extraction_objects.append(lx.data.Extraction(
65+
extraction_class=f"extraction_{i}",
66+
extraction_text=extraction_text,
67+
char_interval=lx.data.CharInterval(
68+
start_pos=pos,
69+
end_pos=pos + len(extraction_text),
70+
),
71+
attributes={
72+
"index": i,
73+
"original_text": extraction_text,
74+
"tool": "langextract_extract",
75+
},
6576
))
6677
start_pos = pos + 1
67-
68-
# Create annotated document
78+
6979
return lx.data.AnnotatedDocument(
7080
document_id=document_id,
7181
text=text,
72-
intervals=intervals
82+
extractions=extraction_objects,
7383
)
7484

7585

0 commit comments

Comments
 (0)