Skip to content

Commit 05439d4

Browse files
authored
refactor(neo4j): rename spec type names for graph, for clarity (#354)
1 parent 00f923a commit 05439d4

File tree

7 files changed

+125
-116
lines changed

7 files changed

+125
-116
lines changed

docs/docs/ops/storages.md

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -70,24 +70,24 @@ The `Neo4j` storage exports each row as a relationship to Neo4j Knowledge Graph.
7070
* `password` (type: `str`): Password for the Neo4j database.
7171
* `db` (type: `str`, optional): The name of the Neo4j database to use as the internal storage, e.g. `neo4j`.
7272
* `mapping`: The mapping from collected row to nodes or relationships of the graph. 2 variations are supported:
73-
* `cocoindex.storages.GraphNode`: each collected row is mapped to a node in the graph. It has the following fields:
73+
* `cocoindex.storages.NodeMapping`: Each collected row is mapped to a node in the graph. It has the following fields:
7474
* `label`: The label of the node.
75-
* `cocoindex.storages.GraphRelationship`: each collected row is mapped to a relationship in the graph,
75+
* `cocoindex.storages.RelationshipMapping`: Each collected row is mapped to a relationship in the graph,
7676
With the following fields:
7777

7878
* `rel_type` (type: `str`): The type of the relationship.
79-
* `source`/`target` (type: `cocoindex.storages.GraphRelationshipEnd`): The source/target node of the relationship, with the following fields:
79+
* `source`/`target` (type: `cocoindex.storages.NodeReferenceMapping`): The source/target node of the relationship, with the following fields:
8080
* `label` (type: `str`): The label of the node.
81-
* `fields` (type: `list[cocoindex.storages.GraphFieldMapping]`): Map fields from the collector to nodes in Neo4j, with the following fields:
82-
* `field_name` (type: `str`): The name of the field in the collected row.
83-
* `node_field_name` (type: `str`, optional): The name of the field to use as the node field. If unspecified, will use the same as `field_name`.
81+
* `fields` (type: `Sequence[cocoindex.storages.TargetFieldMapping]`): Map fields from the collector to nodes in Neo4j, with the following fields:
82+
* `source` (type: `str`): The name of the field in the collected row.
83+
* `target` (type: `str`, optional): The name of the field to use as the node field. If unspecified, will use the same as `source`.
8484

8585
:::info
8686

87-
All fields specified in `fields` will be mapped to properties of source/target nodes. All remaining fields will be mapped to relationship properties by default.
87+
All fields specified in `fields.source` will be mapped to properties of source/target nodes. All remaining fields will be mapped to relationship properties by default.
8888

8989
:::
9090

91-
* `nodes` (type: `dict[str, cocoindex.storages.GraphRelationshipNode]`): This configures indexes for different node labels. Key is the node label. The value type `GraphRelationshipNode` has the following fields to configure [storage indexes](../core/flow_def#storage-indexes) for the node.
91+
* `nodes_storage_spec` (type: `dict[str, cocoindex.storages.NodeStorageSpec]`): This configures indexes for different node labels. Key is the node label. The value type `NodeStorageSpec` has the following fields to configure [storage indexes](../core/flow_def#storage-indexes) for the node.
9292
* `primary_key_fields` is required.
9393
* `vector_indexes` is also supported and optional.

docs/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,5 +46,6 @@
4646
},
4747
"engines": {
4848
"node": ">=18.0"
49-
}
49+
},
50+
"packageManager": "[email protected]+sha512.a6b2f7906b721bba3d67d4aff083df04dad64c399707841b7acf00f6b133b7ac24255f2652fa22ae3534329dc6180534e98d17432037ff6fd140556e2bb3137e"
5051
}

examples/docs_to_kg/main.py

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -96,35 +96,35 @@ def docs_to_kg_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.D
9696
"document_node",
9797
cocoindex.storages.Neo4j(
9898
connection=conn_spec,
99-
mapping=cocoindex.storages.GraphNode(label="Document")),
99+
mapping=cocoindex.storages.NodeMapping(label="Document")),
100100
primary_key_fields=["filename"],
101101
)
102102
entity_relationship.export(
103103
"entity_relationship",
104104
cocoindex.storages.Neo4j(
105105
connection=conn_spec,
106-
mapping=cocoindex.storages.GraphRelationship(
106+
mapping=cocoindex.storages.RelationshipMapping(
107107
rel_type="RELATIONSHIP",
108-
source=cocoindex.storages.GraphRelationshipEnd(
108+
source=cocoindex.storages.NodeReferenceMapping(
109109
label="Entity",
110110
fields=[
111-
cocoindex.storages.GraphFieldMapping(
112-
field_name="subject", node_field_name="value"),
113-
cocoindex.storages.GraphFieldMapping(
114-
field_name="subject_embedding", node_field_name="embedding"),
111+
cocoindex.storages.TargetFieldMapping(
112+
source="subject", target="value"),
113+
cocoindex.storages.TargetFieldMapping(
114+
source="subject_embedding", target="embedding"),
115115
]
116116
),
117-
target=cocoindex.storages.GraphRelationshipEnd(
117+
target=cocoindex.storages.NodeReferenceMapping(
118118
label="Entity",
119119
fields=[
120-
cocoindex.storages.GraphFieldMapping(
121-
field_name="object", node_field_name="value"),
122-
cocoindex.storages.GraphFieldMapping(
123-
field_name="object_embedding", node_field_name="embedding"),
120+
cocoindex.storages.TargetFieldMapping(
121+
source="object", target="value"),
122+
cocoindex.storages.TargetFieldMapping(
123+
source="object_embedding", target="embedding"),
124124
]
125125
),
126-
nodes={
127-
"Entity": cocoindex.storages.GraphRelationshipNode(
126+
nodes_storage_spec={
127+
"Entity": cocoindex.storages.NodeStorageSpec(
128128
primary_key_fields=["value"],
129129
vector_indexes=[
130130
cocoindex.VectorIndexDef(
@@ -142,16 +142,16 @@ def docs_to_kg_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.D
142142
"entity_mention",
143143
cocoindex.storages.Neo4j(
144144
connection=conn_spec,
145-
mapping=cocoindex.storages.GraphRelationship(
145+
mapping=cocoindex.storages.RelationshipMapping(
146146
rel_type="MENTION",
147-
source=cocoindex.storages.GraphRelationshipEnd(
147+
source=cocoindex.storages.NodeReferenceMapping(
148148
label="Document",
149-
fields=[cocoindex.storages.GraphFieldMapping("filename")],
149+
fields=[cocoindex.storages.TargetFieldMapping("filename")],
150150
),
151-
target=cocoindex.storages.GraphRelationshipEnd(
151+
target=cocoindex.storages.NodeReferenceMapping(
152152
label="Entity",
153-
fields=[cocoindex.storages.GraphFieldMapping(
154-
field_name="entity", node_field_name="value")],
153+
fields=[cocoindex.storages.TargetFieldMapping(
154+
source="entity", target="value")],
155155
),
156156
),
157157
),

python/cocoindex/storages.py

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -29,44 +29,44 @@ class Neo4jConnection:
2929
db: str | None = None
3030

3131
@dataclass
32-
class GraphFieldMapping:
33-
"""Mapping for a Neo4j field."""
34-
field_name: str
32+
class TargetFieldMapping:
33+
"""Mapping for a graph element (node or relationship) field."""
34+
source: str
3535
# Field name for the node in the Knowledge Graph.
3636
# If unspecified, it's the same as `field_name`.
37-
node_field_name: str | None = None
37+
target: str | None = None
3838

3939
@dataclass
40-
class GraphRelationshipEnd:
41-
"""Spec for a Neo4j node type."""
40+
class NodeReferenceMapping:
41+
"""Spec for a referenced graph node, usually as part of a relationship."""
4242
label: str
43-
fields: list[GraphFieldMapping]
43+
fields: list[TargetFieldMapping]
4444

4545
@dataclass
46-
class GraphRelationshipNode:
47-
"""Spec for a Neo4j node type."""
46+
class NodeStorageSpec:
47+
"""Storage spec for a graph node."""
4848
primary_key_fields: Sequence[str]
4949
vector_indexes: Sequence[index.VectorIndexDef] = ()
5050

5151
@dataclass
52-
class GraphNode:
53-
"""Spec for a Neo4j node type."""
52+
class NodeMapping:
53+
"""Spec to map a row to a graph node."""
5454
kind = "Node"
5555

5656
label: str
5757

5858
@dataclass
59-
class GraphRelationship:
60-
"""Spec for a Neo4j relationship."""
59+
class RelationshipMapping:
60+
"""Spec to map a row to a graph relationship."""
6161
kind = "Relationship"
6262

6363
rel_type: str
64-
source: GraphRelationshipEnd
65-
target: GraphRelationshipEnd
66-
nodes: dict[str, GraphRelationshipNode] | None = None
64+
source: NodeReferenceMapping
65+
target: NodeReferenceMapping
66+
nodes_storage_spec: dict[str, NodeStorageSpec] | None = None
6767

6868
class Neo4j(op.StorageSpec):
6969
"""Graph storage powered by Neo4j."""
7070

7171
connection: AuthEntryReference
72-
mapping: GraphNode | GraphRelationship
72+
mapping: NodeMapping | RelationshipMapping

src/ops/storages/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
pub mod neo4j;
22
pub mod postgres;
33
pub mod qdrant;
4+
pub mod spec;

src/ops/storages/neo4j.rs

Lines changed: 28 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
use crate::prelude::*;
2+
3+
use super::spec::{
4+
GraphElementMapping, NodeReferenceMapping, RelationshipMapping, TargetFieldMapping,
5+
};
26
use crate::setup::components::{self, State};
37
use crate::setup::{ResourceSetupStatusCheck, SetupChangeType};
48
use crate::{ops::sdk::*, setup::CombinedState};
@@ -18,58 +22,10 @@ pub struct ConnectionSpec {
1822
db: Option<String>,
1923
}
2024

21-
#[derive(Debug, Deserialize)]
22-
pub struct GraphFieldMappingSpec {
23-
field_name: FieldName,
24-
25-
/// Field name for the node in the Knowledge Graph.
26-
/// If unspecified, it's the same as `field_name`.
27-
#[serde(default)]
28-
node_field_name: Option<FieldName>,
29-
}
30-
31-
impl GraphFieldMappingSpec {
32-
fn get_node_field_name(&self) -> &FieldName {
33-
self.node_field_name.as_ref().unwrap_or(&self.field_name)
34-
}
35-
}
36-
37-
#[derive(Debug, Deserialize)]
38-
pub struct GraphRelationshipEndSpec {
39-
label: String,
40-
fields: Vec<GraphFieldMappingSpec>,
41-
}
42-
43-
#[derive(Debug, Deserialize)]
44-
pub struct GraphRelationshipNodeSpec {
45-
#[serde(flatten)]
46-
index_options: spec::IndexOptions,
47-
}
48-
49-
#[derive(Debug, Deserialize)]
50-
pub struct GraphNodeSpec {
51-
label: String,
52-
}
53-
54-
#[derive(Debug, Deserialize)]
55-
pub struct GraphRelationshipSpec {
56-
rel_type: String,
57-
source: GraphRelationshipEndSpec,
58-
target: GraphRelationshipEndSpec,
59-
nodes: Option<BTreeMap<String, GraphRelationshipNodeSpec>>,
60-
}
61-
62-
#[derive(Debug, Deserialize)]
63-
#[serde(tag = "kind")]
64-
pub enum GraphMappingSpec {
65-
Relationship(GraphRelationshipSpec),
66-
Node(GraphNodeSpec),
67-
}
68-
6925
#[derive(Debug, Deserialize)]
7026
pub struct Spec {
71-
connection: AuthEntryReference,
72-
mapping: GraphMappingSpec,
27+
connection: spec::AuthEntryReference,
28+
mapping: GraphElementMapping,
7329
}
7430

7531
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
@@ -101,12 +57,12 @@ impl ElementType {
10157
}
10258
}
10359

104-
fn from_mapping_spec(spec: &GraphMappingSpec) -> Self {
60+
fn from_mapping_spec(spec: &GraphElementMapping) -> Self {
10561
match spec {
106-
GraphMappingSpec::Relationship(spec) => {
62+
GraphElementMapping::Relationship(spec) => {
10763
ElementType::Relationship(spec.rel_type.clone())
10864
}
109-
GraphMappingSpec::Node(spec) => ElementType::Node(spec.label.clone()),
65+
GraphElementMapping::Node(spec) => ElementType::Node(spec.label.clone()),
11066
}
11167
}
11268

@@ -283,7 +239,9 @@ fn mapped_field_values_to_bolt<'a>(
283239

284240
fn basic_value_to_bolt(value: &BasicValue, schema: &BasicValueType) -> Result<BoltType> {
285241
let bolt_value = match value {
286-
BasicValue::Bytes(v) => BoltType::Bytes(neo4rs::BoltBytes::new(v.clone())),
242+
BasicValue::Bytes(v) => {
243+
BoltType::Bytes(neo4rs::BoltBytes::new(bytes::Bytes::from_owner(v.clone())))
244+
}
287245
BasicValue::Str(v) => BoltType::String(neo4rs::BoltString::new(v)),
288246
BasicValue::Bool(v) => BoltType::Boolean(neo4rs::BoltBoolean::new(*v)),
289247
BasicValue::Int64(v) => BoltType::Integer(neo4rs::BoltInteger::new(*v)),
@@ -393,7 +351,7 @@ impl ExportContext {
393351
key_fields.iter().map(|f| &f.name),
394352
);
395353
let result = match spec.mapping {
396-
GraphMappingSpec::Node(node_spec) => {
354+
GraphElementMapping::Node(node_spec) => {
397355
let delete_cypher = formatdoc! {"
398356
OPTIONAL MATCH (old_node:{label} {key_fields_literal})
399357
WITH old_node
@@ -433,7 +391,7 @@ impl ExportContext {
433391
tgt_fields: None,
434392
}
435393
}
436-
GraphMappingSpec::Relationship(rel_spec) => {
394+
GraphElementMapping::Relationship(rel_spec) => {
437395
let delete_cypher = formatdoc! {"
438396
OPTIONAL MATCH (old_src)-[old_rel:{rel_type} {key_fields_literal}]->(old_tgt)
439397
@@ -515,7 +473,7 @@ impl ExportContext {
515473
create_order: 1,
516474
delete_cypher,
517475
insert_cypher,
518-
delete_before_upsert: true,
476+
delete_before_upsert: false, // true
519477
key_field_params,
520478
key_fields,
521479
value_fields,
@@ -687,15 +645,15 @@ impl RelationshipSetupState {
687645
}
688646
let mut dependent_node_labels = vec![];
689647
match &spec.mapping {
690-
GraphMappingSpec::Node(_) => {}
691-
GraphMappingSpec::Relationship(rel_spec) => {
648+
GraphElementMapping::Node(_) => {}
649+
GraphElementMapping::Relationship(rel_spec) => {
692650
let (src_label_info, tgt_label_info) = end_nodes_label_info.ok_or_else(|| {
693651
anyhow!(
694652
"Expect `end_nodes_label_info` existing for relationship `{}`",
695653
rel_spec.rel_type
696654
)
697655
})?;
698-
for (label, node) in rel_spec.nodes.iter().flatten() {
656+
for (label, node) in rel_spec.nodes_storage_spec.iter().flatten() {
699657
if let Some(primary_key_fields) = &node.index_options.primary_key_fields {
700658
sub_components.push(ComponentState {
701659
object_label: ElementType::Node(label.clone()),
@@ -726,7 +684,7 @@ impl RelationshipSetupState {
726684
}
727685
dependent_node_labels.extend(
728686
rel_spec
729-
.nodes
687+
.nodes_storage_spec
730688
.iter()
731689
.flat_map(|nodes| nodes.keys())
732690
.cloned(),
@@ -1079,25 +1037,25 @@ impl Factory {
10791037
struct DependentNodeLabelAnalyzer<'a> {
10801038
label_name: &'a str,
10811039
fields: IndexMap<&'a str, AnalyzedGraphFieldMapping>,
1082-
remaining_fields: HashMap<&'a str, &'a GraphFieldMappingSpec>,
1040+
remaining_fields: HashMap<&'a str, &'a TargetFieldMapping>,
10831041
index_options: Option<&'a IndexOptions>,
10841042
}
10851043

10861044
impl<'a> DependentNodeLabelAnalyzer<'a> {
10871045
fn new(
1088-
rel_spec: &'a GraphRelationshipSpec,
1089-
rel_end_spec: &'a GraphRelationshipEndSpec,
1046+
rel_spec: &'a RelationshipMapping,
1047+
rel_end_spec: &'a NodeReferenceMapping,
10901048
) -> Result<Self> {
10911049
Ok(Self {
10921050
label_name: rel_end_spec.label.as_str(),
10931051
fields: IndexMap::new(),
10941052
remaining_fields: rel_end_spec
10951053
.fields
10961054
.iter()
1097-
.map(|f| (f.field_name.as_str(), f))
1055+
.map(|f| (f.source.as_str(), f))
10981056
.collect(),
10991057
index_options: rel_spec
1100-
.nodes
1058+
.nodes_storage_spec
11011059
.as_ref()
11021060
.and_then(|nodes| nodes.get(&rel_end_spec.label))
11031061
.and_then(|node_spec| Some(&node_spec.index_options)),
@@ -1110,10 +1068,10 @@ impl<'a> DependentNodeLabelAnalyzer<'a> {
11101068
None => return false,
11111069
};
11121070
self.fields.insert(
1113-
field_info.get_node_field_name().as_str(),
1071+
field_info.get_target().as_str(),
11141072
AnalyzedGraphFieldMapping {
11151073
field_idx,
1116-
field_name: field_info.get_node_field_name().clone(),
1074+
field_name: field_info.get_target().clone(),
11171075
value_type: field_schema.value_type.typ.clone(),
11181076
},
11191077
);
@@ -1184,7 +1142,7 @@ impl StorageFactoryBase for Factory {
11841142
let setup_key = GraphElement::from_spec(&spec);
11851143

11861144
let (value_fields_info, rel_end_label_info) = match &spec.mapping {
1187-
GraphMappingSpec::Node(_) => (
1145+
GraphElementMapping::Node(_) => (
11881146
value_fields_schema
11891147
.into_iter()
11901148
.enumerate()
@@ -1196,7 +1154,7 @@ impl StorageFactoryBase for Factory {
11961154
.collect(),
11971155
None,
11981156
),
1199-
GraphMappingSpec::Relationship(rel_spec) => {
1157+
GraphElementMapping::Relationship(rel_spec) => {
12001158
let mut src_label_analyzer =
12011159
DependentNodeLabelAnalyzer::new(&rel_spec, &rel_spec.source)?;
12021160
let mut tgt_label_analyzer =

0 commit comments

Comments
 (0)