Skip to content

Commit ba3a5e7

Browse files
authored
Merge pull request #132 from IBM/semantic_operators
Semantic operators
2 parents 4889ccb + cb74103 commit ba3a5e7

File tree

65 files changed

+16652
-14
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+16652
-14
lines changed

.github/workflows/releases.yaml

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ on:
1818
release-method:
1919
description: "How the release was triggered (commit-message or tag-based)"
2020
value: ${{ jobs.check-release.outputs.release-method }}
21+
prerelease:
22+
description: "Whether this is a prerelease"
23+
value: ${{ jobs.check-release.outputs.prerelease }}
2124

2225
jobs:
2326
check-release:
@@ -28,6 +31,7 @@ jobs:
2831
current-version: ${{ steps.version.outputs.current-version }}
2932
next-version: ${{ steps.version.outputs.next-version }}
3033
release-method: ${{ steps.check-commit.outputs.release-method || steps.check-tag.outputs.release-method }}
34+
prerelease: "false"
3135

3236
steps:
3337
- uses: actions/checkout@v4
@@ -148,18 +152,16 @@ jobs:
148152
git push origin "v${NEXT_VERSION}"
149153
150154
- name: Create GitHub Release
151-
uses: actions/create-release@v1
152155
env:
153156
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
154-
with:
155-
tag_name: v${{ needs.check-release.outputs.next-version }}
156-
release_name: Release v${{ needs.check-release.outputs.next-version }}
157-
body: |
158-
Automated release: ${{ needs.check-release.outputs.bump-type }} bump
157+
run: |
158+
gh release create v${{ needs.check-release.outputs.next-version }} \
159+
--title "Release v${{ needs.check-release.outputs.next-version }}" \
160+
--body "Automated release: ${{ needs.check-release.outputs.bump-type }} bump
159161
160-
Previous version: ${{ needs.check-release.outputs.current-version }}
161-
New version: ${{ needs.check-release.outputs.next-version }}
162+
Previous version: ${{ needs.check-release.outputs.current-version }}
163+
New version: ${{ needs.check-release.outputs.next-version }}
162164

163-
Triggered via: ${{ needs.check-release.outputs.release-method }}
164-
draft: false
165-
prerelease: false
165+
Triggered via: ${{ needs.check-release.outputs.release-method }}" \
166+
--draft=false \
167+
--prerelease=false

README.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,12 @@
1111
Build AI-powered pipelines as <b>typed data transformations</b>—combining Pydantic schemas, LLM-powered transduction, and async execution.
1212
</p>
1313

14+
---
15+
16+
## ✨ Why Agentics
17+
18+
Most "agent frameworks" let untyped text flow through a pipeline. Agentics flips that: **types are the interface**.
19+
Workflows are expressed as transformations between structured states, with predictable schemas and composable operators.
1420

1521
---
1622

@@ -120,7 +126,7 @@ Apache 2.0
120126

121127
## 👥 Authors
122128

123-
**Project Lead**
129+
**Principal Investigator**
124130
- Alfio Massimiliano Gliozzo (IBM Research) — gliozzo@us.ibm.com
125131

126132
**Core Contributors**

examples/discovery/agentic_db.py

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
from dotenv import load_dotenv
2+
3+
load_dotenv()
4+
5+
import csv
6+
import json
7+
import os
8+
import sqlite3
9+
from pathlib import Path
10+
from typing import IO, Optional, Union
11+
12+
import pandas as pd
13+
from pydantic import BaseModel, ConfigDict, PrivateAttr
14+
15+
16+
def read_table_smart(path: str) -> pd.DataFrame:
17+
# read a small sample (first line or two)
18+
with open(path, "r", encoding="utf-8") as f:
19+
sample = f.readline()
20+
21+
# count potential delimiters
22+
commas = sample.count(",")
23+
tabs = sample.count("\t")
24+
semicolons = sample.count(";")
25+
26+
# choose the most frequent one
27+
if tabs > commas and tabs > semicolons:
28+
sep = "\t"
29+
elif semicolons > commas:
30+
sep = ";"
31+
else:
32+
sep = ","
33+
34+
# print(f"Detected delimiter: {repr(sep)}")
35+
return pd.read_csv(path, sep=sep, engine="python")
36+
37+
38+
class AgenticDB(BaseModel):
39+
model_config = ConfigDict(arbitrary_types_allowed=True)
40+
name: Optional[str] = None
41+
# _conn: sqlite3.Connection = PrivateAttr(default=None)
42+
dataset_description: Optional[str] = None
43+
name: Optional[str] = None
44+
df: Optional[dict] = None
45+
path: Optional[str] = None
46+
columns: Optional[list] = None
47+
metadata: Optional[dict] = None
48+
49+
@classmethod
50+
def import_from_discovery_bench_metadata(cls, metadata_path: str):
51+
output_dbs = []
52+
metadata = json.load(open(metadata_path, "r"))
53+
54+
datadaset_csvs = [
55+
metadata["datasets"][i]["name"]
56+
for i in range(len(metadata["datasets"]))
57+
if metadata["datasets"][i]["name"].endswith("csv")
58+
]
59+
datadaset_txts = [
60+
metadata["datasets"][i]["name"]
61+
for i in range(len(metadata["datasets"]))
62+
if metadata["datasets"][i]["name"].endswith("txt")
63+
]
64+
datadaset_descriptions = [
65+
metadata["datasets"][i]["description"]
66+
for i in range(len(metadata["datasets"]))
67+
]
68+
columns = [
69+
metadata["datasets"][i]["columns"]["raw"]
70+
for i in range(len(metadata["datasets"]))
71+
]
72+
for dataset_csv, dataset_description, column in zip(
73+
datadaset_csvs, datadaset_descriptions, columns
74+
):
75+
data_path = Path(os.path.dirname(metadata_path)) / dataset_csv
76+
database = AgenticDB(
77+
dataset_description=dataset_description,
78+
df=read_table_smart(data_path).to_dict(),
79+
path=str(data_path),
80+
name=dataset_csv,
81+
columns=column,
82+
)
83+
# database.import_db_from_csv(data_path)
84+
output_dbs.append(database)
85+
return output_dbs
86+
87+
def import_db_from_csv(self, source: Union[str, IO[bytes]] = None):
88+
"""
89+
Import a CSV file into an in-memory or file-based SQLite database.
90+
Supports both:
91+
- path to a CSV file (string)
92+
- file-like buffer (from Streamlit uploader or similar)
93+
"""
94+
# Handle file path or buffer
95+
if isinstance(source, str):
96+
self.path = source
97+
self.df = pd.read_csv(source)
98+
elif source is not None:
99+
self.path = ":memory:" # store in memory if no path
100+
self.df = pd.read_csv(source)
101+
elif self.path:
102+
self.df = pd.read_csv(self.path)
103+
else:
104+
raise ValueError(
105+
"Either a CSV file path or a file-like object must be provided."
106+
)
107+
108+
# Connect to SQLite (memory or file)
109+
# self._conn = sqlite3.connect(self.path)
110+
111+
# Write dataframe to DB
112+
# self.df.to_sql("data", self._conn, index=False, if_exists="replace")
113+
114+
return self.df
115+
116+
def query_db(self, sql: str) -> pd.DataFrame:
117+
"""Execute an SQL query and return the result as a pandas DataFrame."""
118+
try:
119+
df = pd.read_sql_query(sql, self._conn)
120+
return df
121+
except Exception as e:
122+
print(f"Error executing query: {e}")
123+
return pd.DataFrame()
124+
125+
def get_description(self) -> str:
126+
return f"""===============================
127+
Dataset Path: {self.path}
128+
Dataset Description: {self.dataset_description}
129+
Columns: {self.columns}
130+
"""

examples/discovery/atypes.py

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
import json
2+
import os
3+
from pathlib import Path
4+
from typing import Optional, Union
5+
6+
from agentic_db import AgenticDB
7+
from pydantic import BaseModel, Field
8+
9+
from agentics import AG
10+
11+
12+
class IntermediateEvidence(BaseModel):
13+
evidence_found: Optional[bool] = Field(
14+
None,
15+
description="Return True if you found any relevant evidence for the QUESTION, False otherwise",
16+
)
17+
original_question: Optional[str] = None
18+
evidence: Optional[str] = Field(
19+
None,
20+
description="Identify useful information needed to answer the given QUESTION",
21+
)
22+
partial_answer: Optional[str] = Field(
23+
None,
24+
description="Provide a partial answer for the given Question bsed on the SOURCE data",
25+
)
26+
27+
28+
class Answer(BaseModel):
29+
short_answer: Optional[str] = Field(
30+
None,
31+
description="Provide a one sentence answer which specifically addresses the question",
32+
)
33+
full_answer: Optional[str] = Field(
34+
None,
35+
description="Provide a detailed answer for the given question taking into consideration the evidence sources provided",
36+
)
37+
selected_evidence: Optional[list[IntermediateEvidence]]
38+
confidence: Optional[float] = None
39+
40+
41+
class Question(BaseModel):
42+
qid: Optional[Union[int, str]] = None
43+
true_hypothesis: Optional[str] = None
44+
generated_hypothesis: Optional[str] = Field(
45+
None,
46+
description="A specific hypothesis that supports the question, derived from the analysis of the input dataset",
47+
)
48+
domain_knowledge: Optional[str] = None
49+
question_type: Optional[str] = None
50+
question: Optional[str] = None
51+
dataset: Optional[str] = None
52+
dbs: Optional[list[AgenticDB]] = None
53+
intermediate_evidence: Optional[list[IntermediateEvidence]] = []
54+
full_answer: Optional[Answer] = None
55+
56+
@classmethod
57+
def import_questions_from_metadata_as_ag(
58+
cls, metadata_path: str, import_dbs: bool = False
59+
) -> AG:
60+
metadata = json.load(open(metadata_path, "r"))
61+
dbs = AgenticDB.import_from_discovery_bench_metadata(metadata_path)
62+
output = AG(atype=Question)
63+
for question in metadata["queries"][0]:
64+
question_obj = Question(**question)
65+
question_obj.domain_knowledge = metadata.get("domain_knowledge")
66+
if import_dbs:
67+
question_obj.dbs = dbs
68+
output.append(question_obj)
69+
return output
70+
71+
72+
class Dataset(BaseModel):
73+
metadata_path: Optional[str] = None
74+
datasets_descriptions: Optional[list[str]] = None
75+
dbs: Optional[list[AgenticDB]] = None
76+
questions: Optional[list[Question]] = []
77+
78+
@classmethod
79+
def import_from_discovery_bench_metadata(
80+
cls,
81+
dataset,
82+
metadata_path="/Users/gliozzo/Code/agentics911/agentics/sandbox/discoverybench/discoverybench/real/demo",
83+
) -> str:
84+
dataset_obj = Dataset()
85+
86+
if not dataset_obj.questions:
87+
dataset_obj.questions = []
88+
base_path = Path(metadata_path) / dataset
89+
print("Importing dataset", end="")
90+
for metadata in os.listdir(base_path):
91+
if metadata.endswith(".json"):
92+
if not dataset_obj.dbs:
93+
dataset_obj.dbs = AgenticDB.import_from_discovery_bench_metadata(
94+
base_path / metadata
95+
)
96+
print(".", end=".")
97+
dataset_obj.questions += Question.import_questions_from_metadata_as_ag(
98+
base_path / metadata, import_dbs=False
99+
).states
100+
dataset_obj.get_source_descriptions()
101+
return dataset_obj
102+
103+
def get_source_descriptions(self) -> str:
104+
self.datasets_descriptions = []
105+
for db in self.dbs:
106+
107+
self.datasets_descriptions.append(
108+
f"""
109+
Dataset Description: {db.dataset_description}
110+
Columns: {db.columns}
111+
"""
112+
)
113+
return self.datasets_descriptions
114+
115+
def get_questions_as_ag(self) -> AG:
116+
return AG(atype=Question, states=self.questions)

0 commit comments

Comments
 (0)