Skip to content

Commit 2197f9c

Browse files
committed
Implement merge_concrete_entities function and add some tests
1 parent 9644319 commit 2197f9c

File tree

2 files changed

+224
-7
lines changed

2 files changed

+224
-7
lines changed

src/typeagent/knowpro/knowledge.py

Lines changed: 74 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# Licensed under the MIT License.
33

44
import asyncio
5+
from dataclasses import dataclass
56

67
from typechat import Result, TypeChatLanguageModel
78

@@ -74,14 +75,80 @@ async def extract_knowledge_from_text_batch(
7475
def merge_concrete_entities(
7576
entities: list[kplib.ConcreteEntity],
7677
) -> list[kplib.ConcreteEntity]:
77-
"""Merge a list of concrete entities into a single list of merged entities."""
78-
raise NotImplementedError("TODO")
79-
# merged_entities = concrete_to_merged_entities(entities)
78+
"""Merge a list of concrete entities by name, combining types and facets.
79+
80+
Entities with the same name are merged:
81+
- Types are combined into a sorted unique list
82+
- Facets with the same name have their values concatenated with "; "
83+
"""
84+
if not entities:
85+
return []
86+
87+
# Build a dict of merged entities keyed by name
88+
merged: dict[str, _MergedEntity] = {}
89+
90+
for entity in entities:
91+
name_key = entity.name
92+
existing = merged.get(name_key)
93+
94+
if existing is None:
95+
# First occurrence - create new merged entity
96+
merged[name_key] = _MergedEntity(
97+
name=entity.name,
98+
types=set(entity.type),
99+
facets=_facets_to_merged(entity.facets) if entity.facets else {},
100+
)
101+
else:
102+
# Merge into existing
103+
existing.types.update(entity.type)
104+
if entity.facets:
105+
for facet in entity.facets:
106+
facet_name = facet.name
107+
facet_value = str(facet.value) if facet.value else ""
108+
existing.facets.setdefault(facet_name, []).append(facet_value)
109+
110+
# Convert merged entities back to ConcreteEntity
111+
result = []
112+
for merged_entity in merged.values():
113+
concrete = kplib.ConcreteEntity(
114+
name=merged_entity.name,
115+
type=sorted(merged_entity.types),
116+
)
117+
if merged_entity.facets:
118+
concrete.facets = _merged_to_facets(merged_entity.facets)
119+
result.append(concrete)
120+
121+
return result
122+
123+
124+
@dataclass
125+
class _MergedEntity:
126+
"""Internal helper for merging entities."""
127+
128+
name: str
129+
types: set[str]
130+
facets: dict[str, list[str]]
131+
132+
133+
def _facets_to_merged(facets: list[kplib.Facet]) -> dict[str, list[str]]:
134+
"""Convert a list of Facets to a merged facets dict."""
135+
merged: dict[str, list[str]] = {}
136+
for facet in facets:
137+
name = facet.name
138+
value = str(facet.value) if facet.value else ""
139+
merged.setdefault(name, []).append(value)
140+
return merged
141+
80142

81-
# merged_concrete_entities = []
82-
# for merged_entity in merged_entities.values():
83-
# merged_concrete_entities.append(merged_to_concrete_entity(merged_entity))
84-
# return merged_concrete_entities
143+
def _merged_to_facets(merged_facets: dict[str, list[str]]) -> list[kplib.Facet]:
144+
"""Convert a merged facets dict back to a list of Facets."""
145+
facets = []
146+
for name, values in merged_facets.items():
147+
if values:
148+
# Deduplicate values while preserving order
149+
unique_values = list(dict.fromkeys(values))
150+
facets.append(kplib.Facet(name=name, value="; ".join(unique_values)))
151+
return facets
85152

86153

87154
def merge_topics(topics: list[str]) -> list[str]:

tests/test_knowledge.py

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,10 @@
1010
create_knowledge_extractor,
1111
extract_knowledge_from_text,
1212
extract_knowledge_from_text_batch,
13+
merge_concrete_entities,
1314
merge_topics,
1415
)
16+
from typeagent.knowpro.kplib import ConcreteEntity, Facet
1517

1618

1719
class MockKnowledgeExtractor:
@@ -81,3 +83,151 @@ def test_merge_topics():
8183
assert "topic1" in merged_topics
8284
assert "topic2" in merged_topics
8385
assert "topic3" in merged_topics
86+
87+
88+
# Tests for merge_concrete_entities
89+
90+
91+
def test_merge_concrete_entities_empty_list() -> None:
92+
"""Test merging an empty list returns empty list."""
93+
result = merge_concrete_entities([])
94+
assert result == []
95+
96+
97+
def test_merge_concrete_entities_single_entity() -> None:
98+
"""Test merging a single entity preserves case."""
99+
entity = ConcreteEntity(name="Alice", type=["Person"])
100+
result = merge_concrete_entities([entity])
101+
102+
assert len(result) == 1
103+
assert result[0].name == "Alice"
104+
assert result[0].type == ["Person"]
105+
106+
107+
def test_merge_concrete_entities_distinct() -> None:
108+
"""Test merging distinct entities keeps them separate."""
109+
entities = [
110+
ConcreteEntity(name="Alice", type=["Person"]),
111+
ConcreteEntity(name="Bob", type=["Person"]),
112+
]
113+
result = merge_concrete_entities(entities)
114+
115+
assert len(result) == 2
116+
names = {e.name for e in result}
117+
assert names == {"Alice", "Bob"}
118+
119+
120+
def test_merge_concrete_entities_same_name_different_case() -> None:
121+
"""Test that entities with different case names are NOT merged (case-sensitive)."""
122+
entities = [
123+
ConcreteEntity(name="Alice", type=["Person"]),
124+
ConcreteEntity(name="ALICE", type=["Employee"]),
125+
ConcreteEntity(name="alice", type=["Manager"]),
126+
]
127+
result = merge_concrete_entities(entities)
128+
129+
# Case-sensitive: all three are distinct
130+
assert len(result) == 3
131+
names = {e.name for e in result}
132+
assert names == {"Alice", "ALICE", "alice"}
133+
134+
135+
def test_merge_concrete_entities_types_deduplicated_and_sorted() -> None:
136+
"""Test that merged types are deduplicated and sorted."""
137+
entities = [
138+
ConcreteEntity(name="Alice", type=["Person", "Employee"]),
139+
ConcreteEntity(name="Alice", type=["Employee", "Manager"]),
140+
]
141+
result = merge_concrete_entities(entities)
142+
143+
assert len(result) == 1
144+
assert result[0].type == ["Employee", "Manager", "Person"]
145+
146+
147+
def test_merge_concrete_entities_with_facets() -> None:
148+
"""Test merging entities with facets."""
149+
entities = [
150+
ConcreteEntity(
151+
name="Alice",
152+
type=["Person"],
153+
facets=[Facet(name="age", value="30")],
154+
),
155+
ConcreteEntity(
156+
name="Alice",
157+
type=["Employee"],
158+
facets=[Facet(name="department", value="Engineering")],
159+
),
160+
]
161+
result = merge_concrete_entities(entities)
162+
163+
assert len(result) == 1
164+
assert result[0].facets is not None
165+
facet_names = {f.name for f in result[0].facets}
166+
assert facet_names == {"age", "department"}
167+
168+
169+
def test_merge_concrete_entities_same_facet_combines_values() -> None:
170+
"""Test that facets with the same name have values combined."""
171+
entities = [
172+
ConcreteEntity(
173+
name="Alice",
174+
type=["Person"],
175+
facets=[Facet(name="hobby", value="reading")],
176+
),
177+
ConcreteEntity(
178+
name="Alice",
179+
type=["Person"],
180+
facets=[Facet(name="hobby", value="swimming")],
181+
),
182+
]
183+
result = merge_concrete_entities(entities)
184+
185+
assert len(result) == 1
186+
assert result[0].facets is not None
187+
hobby_facet = next(f for f in result[0].facets if f.name == "hobby")
188+
assert hobby_facet.value == "reading; swimming"
189+
190+
191+
def test_merge_concrete_entities_facets_deduplicated() -> None:
192+
"""Test that duplicate facet values are deduplicated."""
193+
entities = [
194+
ConcreteEntity(
195+
name="Alice",
196+
type=["Person"],
197+
facets=[Facet(name="hobby", value="reading")],
198+
),
199+
ConcreteEntity(
200+
name="Alice",
201+
type=["Person"],
202+
facets=[Facet(name="hobby", value="reading")], # Duplicate
203+
),
204+
ConcreteEntity(
205+
name="Alice",
206+
type=["Person"],
207+
facets=[Facet(name="hobby", value="swimming")],
208+
),
209+
]
210+
result = merge_concrete_entities(entities)
211+
212+
assert len(result) == 1
213+
assert result[0].facets is not None
214+
hobby_facet = next(f for f in result[0].facets if f.name == "hobby")
215+
assert hobby_facet.value == "reading; swimming"
216+
217+
218+
def test_merge_concrete_entities_without_facets_with_facets() -> None:
219+
"""Test merging an entity without facets with one that has facets."""
220+
entities = [
221+
ConcreteEntity(name="Alice", type=["Person"]),
222+
ConcreteEntity(
223+
name="Alice",
224+
type=["Employee"],
225+
facets=[Facet(name="department", value="Engineering")],
226+
),
227+
]
228+
result = merge_concrete_entities(entities)
229+
230+
assert len(result) == 1
231+
assert result[0].facets is not None
232+
assert len(result[0].facets) == 1
233+
assert result[0].facets[0].name == "department"

0 commit comments

Comments
 (0)