Skip to content

Commit 869a05e

Browse files
committed
ai: various fixes: imports, deps, retry, output
1 parent 0d52470 commit 869a05e

File tree

4 files changed

+26
-13
lines changed

4 files changed

+26
-13
lines changed

intelmq/bots/parsers/unstructured_text/.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
# SPDX-FileCopyrightText: 2025 Aaron Kaplan
2+
#
3+
# SPDX-License-Identifier: AGPL-3.0-or-later
14
.env
25
test_data/*
3-
__pycache__
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,5 @@
1+
# SPDX-FileCopyrightText: 2025 Aaron Kaplan, Institute for Common Good Technology
2+
# SPDX-License-Identifier: AGPL-3.0-or-later
13
pydantic-ai
24
pydantic
5+
langfuse

intelmq/bots/parsers/unstructured_text/main.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
#!/usr/bin/env python3
2+
# SPDX-FileCopyrightText: 2025 Aaron Kaplan
3+
#
4+
# SPDX-License-Identifier: AGPL-3.0-or-later
25
"""IntelMQ CTI Extractor - Extract security events from threat intelligence reports"""
36

47
import os

intelmq/bots/parsers/unstructured_text/parser.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# SPDX-FileCopyrightText: 2025 Aaron Kaplan, Institute for Common Good Technology
12
#
23
# SPDX-License-Identifier: AGPL-3.0-or-later
34

@@ -12,15 +13,13 @@
1213

1314
from intelmq.lib.bot import ParserBot, utils
1415
from intelmq.lib.exceptions import InvalidArgument
15-
from intelmq.lib.harmonization import ClassificationType
1616
from intelmq.lib.exceptions import MissingDependencyError
1717

18-
from pydantic import BaseModel, AfterValidator
19-
from typing import Annotated, List
18+
from pydantic import BaseModel, ValidationError
19+
from typing import List
2020
from pydantic_ai import Agent
2121

2222
from intelmq.lib.basemodel import IntelMQEventModel
23-
from requests import api
2423

2524
from pprint import pprint
2625

@@ -36,7 +35,7 @@
3635
)
3736

3837

39-
def extract_data(text: str, model: str, api_key: str) -> List[IntelMQEventModel]:
38+
def extract_data(text: str, model: str, api_key: str, maximum_attempts: int = 5) -> List[IntelMQEventModel]:
4039
"""Use an LLM (part of the config which one) to extract IDF-style events from the raw text.
4140
We use ai.pydantic.dev for telling the LLM to extract and map all information from the (unstructured) `text` to the IntelMQ Data Format
4241
(see https://docs.intelmq.org/latest/user/event/) for a description of the IntelMQ Data Format (IDF)
@@ -48,10 +47,15 @@ def extract_data(text: str, model: str, api_key: str) -> List[IntelMQEventModel]
4847
Agent.instrument_all()
4948
agent = Agent(model, output_type=IntelMQEventModel)
5049

51-
result = agent.run_sync(text)
52-
print(result.output)
53-
print(result.usage())
54-
pprint(result)
50+
for attempt in range(maximum_attempts):
51+
try:
52+
result = agent.run_sync(text)
53+
except ValidationError as exc:
54+
print(f'Got invalid result: ({exc!r}. Trying again ({attempt}/{maximum_attempts}).')
55+
pass
56+
else:
57+
break
58+
print(f'Usage: {result.usage()}')
5559
return [result]
5660

5761

@@ -85,8 +89,9 @@ def process(self):
8589
with open("intelmq/bots/parsers/unstructured_text/test_data/sample.txt", "r") as f:
8690
SAMPLE_CONTENT = f.read()
8791

88-
result = extract_data(
92+
results = extract_data(
8993
SAMPLE_CONTENT, model="openai:gpt-4o", api_key=os.getenv("OPENAI_API_KEY")
9094
)
91-
for r in result:
92-
print(r)
95+
for i, result in enumerate(results):
96+
print(f'Result {i}:')
97+
pprint(result.response.parts[0].args_as_dict())

0 commit comments

Comments
 (0)