Skip to content

Commit fccf67c

Browse files
authored
entities field support for neo4j (#420)
* implement entities for neo4j connecto
1 parent b3789b5 commit fccf67c

File tree

3 files changed

+48
-20
lines changed

3 files changed

+48
-20
lines changed

CHANGELOG.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
1-
## 0.5.12-dev1
1+
2+
## 0.5.12
3+
4+
### Features
5+
6+
* **Support for entities in neo4j connector**
27

38
### Fixes
49

unstructured_ingest/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.5.12-dev1" # pragma: no cover
1+
__version__ = "0.5.12" # pragma: no cover

unstructured_ingest/v2/processes/connectors/neo4j.py

Lines changed: 41 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,28 @@ def run( # type: ignore
111111

112112
return output_filepath
113113

114+
def _add_entities(self, element: dict, graph: "Graph", element_node: _Node) -> None:
115+
entities = element.get("metadata", {}).get("entities", [])
116+
if not entities:
117+
return None
118+
if not isinstance(entities, list):
119+
return None
120+
121+
for entity in entities:
122+
if not isinstance(entity, dict):
123+
continue
124+
if "entity" not in entity or "type" not in entity:
125+
continue
126+
entity_node = _Node(
127+
labels=[Label.ENTITY], properties={"id": entity["entity"]}, id_=entity["entity"]
128+
)
129+
graph.add_edge(
130+
entity_node,
131+
_Node(labels=[Label.ENTITY], properties={"id": entity["type"]}, id_=entity["type"]),
132+
relationship=Relationship.ENTITY_TYPE,
133+
)
134+
graph.add_edge(element_node, entity_node, relationship=Relationship.HAS_ENTITY)
135+
114136
def _create_lexical_graph(self, elements: list[dict], document_node: _Node) -> "Graph":
115137
import networkx as nx
116138

@@ -129,25 +151,23 @@ def _create_lexical_graph(self, elements: list[dict], document_node: _Node) -> "
129151
previous_node = element_node
130152
graph.add_edge(element_node, document_node, relationship=Relationship.PART_OF_DOCUMENT)
131153

154+
self._add_entities(element, graph, element_node)
155+
132156
if self._is_chunk(element):
133-
origin_element_nodes = [
134-
self._create_element_node(origin_element)
135-
for origin_element in format_and_truncate_orig_elements(element)
136-
]
137-
graph.add_edges_from(
138-
[
139-
(origin_element_node, element_node)
140-
for origin_element_node in origin_element_nodes
141-
],
142-
relationship=Relationship.PART_OF_CHUNK,
143-
)
144-
graph.add_edges_from(
145-
[
146-
(origin_element_node, document_node)
147-
for origin_element_node in origin_element_nodes
148-
],
149-
relationship=Relationship.PART_OF_DOCUMENT,
150-
)
157+
for origin_element in format_and_truncate_orig_elements(element):
158+
origin_element_node = self._create_element_node(origin_element)
159+
160+
graph.add_edge(
161+
origin_element_node,
162+
element_node,
163+
relationship=Relationship.PART_OF_CHUNK,
164+
)
165+
graph.add_edge(
166+
origin_element_node,
167+
document_node,
168+
relationship=Relationship.PART_OF_DOCUMENT,
169+
)
170+
self._add_entities(origin_element, graph, origin_element_node)
151171

152172
return graph
153173

@@ -231,13 +251,16 @@ class Label(Enum):
231251
UNSTRUCTURED_ELEMENT = "UnstructuredElement"
232252
CHUNK = "Chunk"
233253
DOCUMENT = "Document"
254+
ENTITY = "Entity"
234255

235256

236257
class Relationship(Enum):
237258
PART_OF_DOCUMENT = "PART_OF_DOCUMENT"
238259
PART_OF_CHUNK = "PART_OF_CHUNK"
239260
NEXT_CHUNK = "NEXT_CHUNK"
240261
NEXT_ELEMENT = "NEXT_ELEMENT"
262+
ENTITY_TYPE = "ENTITY_TYPE"
263+
HAS_ENTITY = "HAS_ENTITY"
241264

242265

243266
class Neo4jUploaderConfig(UploaderConfig):

0 commit comments

Comments
 (0)