|
10 | 10 | create_knowledge_extractor, |
11 | 11 | extract_knowledge_from_text, |
12 | 12 | extract_knowledge_from_text_batch, |
| 13 | + merge_concrete_entities, |
13 | 14 | merge_topics, |
14 | 15 | ) |
| 16 | +from typeagent.knowpro.kplib import ConcreteEntity, Facet |
15 | 17 |
|
16 | 18 |
|
17 | 19 | class MockKnowledgeExtractor: |
@@ -81,3 +83,151 @@ def test_merge_topics(): |
81 | 83 | assert "topic1" in merged_topics |
82 | 84 | assert "topic2" in merged_topics |
83 | 85 | assert "topic3" in merged_topics |
| 86 | + |
| 87 | + |
| 88 | +# Tests for merge_concrete_entities |
| 89 | + |
| 90 | + |
| 91 | +def test_merge_concrete_entities_empty_list() -> None: |
| 92 | + """Test merging an empty list returns empty list.""" |
| 93 | + result = merge_concrete_entities([]) |
| 94 | + assert result == [] |
| 95 | + |
| 96 | + |
| 97 | +def test_merge_concrete_entities_single_entity() -> None: |
| 98 | + """Test merging a single entity preserves case.""" |
| 99 | + entity = ConcreteEntity(name="Alice", type=["Person"]) |
| 100 | + result = merge_concrete_entities([entity]) |
| 101 | + |
| 102 | + assert len(result) == 1 |
| 103 | + assert result[0].name == "Alice" |
| 104 | + assert result[0].type == ["Person"] |
| 105 | + |
| 106 | + |
| 107 | +def test_merge_concrete_entities_distinct() -> None: |
| 108 | + """Test merging distinct entities keeps them separate.""" |
| 109 | + entities = [ |
| 110 | + ConcreteEntity(name="Alice", type=["Person"]), |
| 111 | + ConcreteEntity(name="Bob", type=["Person"]), |
| 112 | + ] |
| 113 | + result = merge_concrete_entities(entities) |
| 114 | + |
| 115 | + assert len(result) == 2 |
| 116 | + names = {e.name for e in result} |
| 117 | + assert names == {"Alice", "Bob"} |
| 118 | + |
| 119 | + |
| 120 | +def test_merge_concrete_entities_same_name_different_case() -> None: |
| 121 | + """Test that entities with different case names are NOT merged (case-sensitive).""" |
| 122 | + entities = [ |
| 123 | + ConcreteEntity(name="Alice", type=["Person"]), |
| 124 | + ConcreteEntity(name="ALICE", type=["Employee"]), |
| 125 | + ConcreteEntity(name="alice", type=["Manager"]), |
| 126 | + ] |
| 127 | + result = merge_concrete_entities(entities) |
| 128 | + |
| 129 | + # Case-sensitive: all three are distinct |
| 130 | + assert len(result) == 3 |
| 131 | + names = {e.name for e in result} |
| 132 | + assert names == {"Alice", "ALICE", "alice"} |
| 133 | + |
| 134 | + |
| 135 | +def test_merge_concrete_entities_types_deduplicated_and_sorted() -> None: |
| 136 | + """Test that merged types are deduplicated and sorted.""" |
| 137 | + entities = [ |
| 138 | + ConcreteEntity(name="Alice", type=["Person", "Employee"]), |
| 139 | + ConcreteEntity(name="Alice", type=["Employee", "Manager"]), |
| 140 | + ] |
| 141 | + result = merge_concrete_entities(entities) |
| 142 | + |
| 143 | + assert len(result) == 1 |
| 144 | + assert result[0].type == ["Employee", "Manager", "Person"] |
| 145 | + |
| 146 | + |
| 147 | +def test_merge_concrete_entities_with_facets() -> None: |
| 148 | + """Test merging entities with facets.""" |
| 149 | + entities = [ |
| 150 | + ConcreteEntity( |
| 151 | + name="Alice", |
| 152 | + type=["Person"], |
| 153 | + facets=[Facet(name="age", value="30")], |
| 154 | + ), |
| 155 | + ConcreteEntity( |
| 156 | + name="Alice", |
| 157 | + type=["Employee"], |
| 158 | + facets=[Facet(name="department", value="Engineering")], |
| 159 | + ), |
| 160 | + ] |
| 161 | + result = merge_concrete_entities(entities) |
| 162 | + |
| 163 | + assert len(result) == 1 |
| 164 | + assert result[0].facets is not None |
| 165 | + facet_names = {f.name for f in result[0].facets} |
| 166 | + assert facet_names == {"age", "department"} |
| 167 | + |
| 168 | + |
| 169 | +def test_merge_concrete_entities_same_facet_combines_values() -> None: |
| 170 | + """Test that facets with the same name have values combined.""" |
| 171 | + entities = [ |
| 172 | + ConcreteEntity( |
| 173 | + name="Alice", |
| 174 | + type=["Person"], |
| 175 | + facets=[Facet(name="hobby", value="reading")], |
| 176 | + ), |
| 177 | + ConcreteEntity( |
| 178 | + name="Alice", |
| 179 | + type=["Person"], |
| 180 | + facets=[Facet(name="hobby", value="swimming")], |
| 181 | + ), |
| 182 | + ] |
| 183 | + result = merge_concrete_entities(entities) |
| 184 | + |
| 185 | + assert len(result) == 1 |
| 186 | + assert result[0].facets is not None |
| 187 | + hobby_facet = next(f for f in result[0].facets if f.name == "hobby") |
| 188 | + assert hobby_facet.value == "reading; swimming" |
| 189 | + |
| 190 | + |
| 191 | +def test_merge_concrete_entities_facets_deduplicated() -> None: |
| 192 | + """Test that duplicate facet values are deduplicated.""" |
| 193 | + entities = [ |
| 194 | + ConcreteEntity( |
| 195 | + name="Alice", |
| 196 | + type=["Person"], |
| 197 | + facets=[Facet(name="hobby", value="reading")], |
| 198 | + ), |
| 199 | + ConcreteEntity( |
| 200 | + name="Alice", |
| 201 | + type=["Person"], |
| 202 | + facets=[Facet(name="hobby", value="reading")], # Duplicate |
| 203 | + ), |
| 204 | + ConcreteEntity( |
| 205 | + name="Alice", |
| 206 | + type=["Person"], |
| 207 | + facets=[Facet(name="hobby", value="swimming")], |
| 208 | + ), |
| 209 | + ] |
| 210 | + result = merge_concrete_entities(entities) |
| 211 | + |
| 212 | + assert len(result) == 1 |
| 213 | + assert result[0].facets is not None |
| 214 | + hobby_facet = next(f for f in result[0].facets if f.name == "hobby") |
| 215 | + assert hobby_facet.value == "reading; swimming" |
| 216 | + |
| 217 | + |
| 218 | +def test_merge_concrete_entities_without_facets_with_facets() -> None: |
| 219 | + """Test merging an entity without facets with one that has facets.""" |
| 220 | + entities = [ |
| 221 | + ConcreteEntity(name="Alice", type=["Person"]), |
| 222 | + ConcreteEntity( |
| 223 | + name="Alice", |
| 224 | + type=["Employee"], |
| 225 | + facets=[Facet(name="department", value="Engineering")], |
| 226 | + ), |
| 227 | + ] |
| 228 | + result = merge_concrete_entities(entities) |
| 229 | + |
| 230 | + assert len(result) == 1 |
| 231 | + assert result[0].facets is not None |
| 232 | + assert len(result[0].facets) == 1 |
| 233 | + assert result[0].facets[0].name == "department" |
0 commit comments