Skip to content

Commit 3b1b1da

Browse files
committed
feat(kuzu): support kuzu as target
1 parent 3797c84 commit 3b1b1da

File tree

11 files changed

+774
-121
lines changed

11 files changed

+774
-121
lines changed

docs/docs/core/settings.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ description: Provide settings for CocoIndex, e.g. database connection, app names
66
import Tabs from '@theme/Tabs';
77
import TabItem from '@theme/TabItem';
88

9-
# CocoIndex Settings
9+
# CocoIndex Setting
1010

1111
Certain settings need to be provided for CocoIndex to work, e.g. database connections, app namespace, etc.
1212

examples/docs_to_knowledge_graph/main.py

Lines changed: 32 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -42,21 +42,22 @@ class Relationship:
4242
)
4343

4444
# Use Neo4j as the graph database
45-
# GraphDbSpec = cocoindex.storages.Neo4j
46-
# GraphDbConnection = cocoindex.storages.Neo4jConnection
47-
# GraphDbDeclaration = cocoindex.storages.Neo4jDeclaration
48-
# conn_spec = neo4j_conn_spec
45+
GraphDbSpec = cocoindex.storages.Neo4j
46+
GraphDbConnection = cocoindex.storages.Neo4jConnection
47+
GraphDbDeclaration = cocoindex.storages.Neo4jDeclaration
48+
conn_spec = neo4j_conn_spec
4949

50-
GraphDbSpec = cocoindex.storages.Kuzu
51-
GraphDbConnection = cocoindex.storages.KuzuConnection
52-
GraphDbDeclaration = cocoindex.storages.KuzuDeclaration
53-
conn_spec = kuzu_conn_spec
50+
# Use Kuzu as the graph database
51+
# GraphDbSpec = cocoindex.storages.Kuzu
52+
# GraphDbConnection = cocoindex.storages.KuzuConnection
53+
# GraphDbDeclaration = cocoindex.storages.KuzuDeclaration
54+
# conn_spec = kuzu_conn_spec
5455

5556

5657
@cocoindex.flow_def(name="DocsToKG")
5758
def docs_to_kg_flow(
5859
flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope
59-
):
60+
) -> None:
6061
"""
6162
Define an example flow that extracts relationship from files and build knowledge graph.
6263
"""
@@ -168,25 +169,25 @@ def docs_to_kg_flow(
168169
),
169170
primary_key_fields=["id"],
170171
)
171-
# entity_mention.export(
172-
# "entity_mention",
173-
# GraphDbSpec(
174-
# connection=conn_spec,
175-
# mapping=cocoindex.storages.Relationships(
176-
# rel_type="MENTION",
177-
# source=cocoindex.storages.NodeFromFields(
178-
# label="Document",
179-
# fields=[cocoindex.storages.TargetFieldMapping("filename")],
180-
# ),
181-
# target=cocoindex.storages.NodeFromFields(
182-
# label="Entity",
183-
# fields=[
184-
# cocoindex.storages.TargetFieldMapping(
185-
# source="entity", target="value"
186-
# )
187-
# ],
188-
# ),
189-
# ),
190-
# ),
191-
# primary_key_fields=["id"],
192-
# )
172+
entity_mention.export(
173+
"entity_mention",
174+
GraphDbSpec(
175+
connection=conn_spec,
176+
mapping=cocoindex.storages.Relationships(
177+
rel_type="MENTION",
178+
source=cocoindex.storages.NodeFromFields(
179+
label="Document",
180+
fields=[cocoindex.storages.TargetFieldMapping("filename")],
181+
),
182+
target=cocoindex.storages.NodeFromFields(
183+
label="Entity",
184+
fields=[
185+
cocoindex.storages.TargetFieldMapping(
186+
source="entity", target="value"
187+
)
188+
],
189+
),
190+
),
191+
),
192+
primary_key_fields=["id"],
193+
)

examples/product_recommendation/main.py

Lines changed: 33 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -77,10 +77,38 @@ def extract_product_info(product: cocoindex.Json, filename: str) -> ProductInfo:
7777
)
7878

7979

80+
neo4j_conn_spec = cocoindex.add_auth_entry(
81+
"Neo4jConnection",
82+
cocoindex.storages.Neo4jConnection(
83+
uri="bolt://localhost:7687",
84+
user="neo4j",
85+
password="cocoindex",
86+
),
87+
)
88+
kuzu_conn_spec = cocoindex.add_auth_entry(
89+
"KuzuConnection",
90+
cocoindex.storages.KuzuConnection(
91+
api_server_url="http://localhost:8123",
92+
),
93+
)
94+
95+
# Use Neo4j as the graph database
96+
GraphDbSpec = cocoindex.storages.Neo4j
97+
GraphDbConnection = cocoindex.storages.Neo4jConnection
98+
GraphDbDeclaration = cocoindex.storages.Neo4jDeclaration
99+
conn_spec = neo4j_conn_spec
100+
101+
# Use Kuzu as the graph database
102+
# GraphDbSpec = cocoindex.storages.Kuzu
103+
# GraphDbConnection = cocoindex.storages.KuzuConnection
104+
# GraphDbDeclaration = cocoindex.storages.KuzuDeclaration
105+
# conn_spec = kuzu_conn_spec
106+
107+
80108
@cocoindex.flow_def(name="StoreProduct")
81109
def store_product_flow(
82110
flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope
83-
):
111+
) -> None:
84112
"""
85113
Define an example flow that extracts triples from files and build knowledge graph.
86114
"""
@@ -122,25 +150,16 @@ def store_product_flow(
122150
taxonomy=t["name"],
123151
)
124152

125-
conn_spec = cocoindex.add_auth_entry(
126-
"Neo4jConnection",
127-
cocoindex.storages.Neo4jConnection(
128-
uri="bolt://localhost:7687",
129-
user="neo4j",
130-
password="cocoindex",
131-
),
132-
)
133-
134153
product_node.export(
135154
"product_node",
136-
cocoindex.storages.Neo4j(
155+
GraphDbSpec(
137156
connection=conn_spec, mapping=cocoindex.storages.Nodes(label="Product")
138157
),
139158
primary_key_fields=["id"],
140159
)
141160

142161
flow_builder.declare(
143-
cocoindex.storages.Neo4jDeclaration(
162+
GraphDbDeclaration(
144163
connection=conn_spec,
145164
nodes_label="Taxonomy",
146165
primary_key_fields=["value"],
@@ -149,7 +168,7 @@ def store_product_flow(
149168

150169
product_taxonomy.export(
151170
"product_taxonomy",
152-
cocoindex.storages.Neo4j(
171+
GraphDbSpec(
153172
connection=conn_spec,
154173
mapping=cocoindex.storages.Relationships(
155174
rel_type="PRODUCT_TAXONOMY",
@@ -175,7 +194,7 @@ def store_product_flow(
175194
)
176195
product_complementary_taxonomy.export(
177196
"product_complementary_taxonomy",
178-
cocoindex.storages.Neo4j(
197+
GraphDbSpec(
179198
connection=conn_spec,
180199
mapping=cocoindex.storages.Relationships(
181200
rel_type="PRODUCT_COMPLEMENTARY_TAXONOMY",

history.txt

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
RETURN 1;
2+
RETURN "abc";
3+
RETURN "ab\nc";
4+
RETURN "ab\n\\c";
5+
RETURN "ab\n\\\x20c";
6+
RETURN "ab\n\\c";
7+
RETURN "ab\u000Ac";
8+
RETURN "ab\u0Ac";
9+
RETURN "ab\u000Ac";
10+
RETURN "ab\u00000Ac";
11+
RETURN "ab\u000Ac";
12+
RETURN "弱智";
13+
RETURN "a\"b";
14+
RETURN "a\
15+
b"
16+
17+
;
18+
RETURN true;
19+
RETURN BLOB("\x20")
20+
;
21+
RETURN BLOB("\x20");
22+
RETURN BLOB("\\x20");
23+
RETURN BLOB("\\x20\\30");
24+
RETURN BLOB("\\x20\\x30");
25+
RETURN BLOB("\\x200");
26+
RETURN BLOB("\\x200");RETURN BLOB('\\xBC\\xBD\\xBA\\xAA') as result;
27+
RETURN BLOB('\\xBC\\xBD\\xBA\\xAA') as result;
28+
RETURN "\\u0030";
29+
RETURN "\\\u0030";
30+
RETURN "\u0030";
31+
RETURN UINT64[2]([0,1]);
32+
RETURN CAST(UINT64[2], [0,1]);
33+
RETURN CAST([0,1], UINT64[2]);
34+
RETURN CAST([0,1], 'UINT64[2]');
35+
RETURN [[1,2],[3,4,5]];
36+
RETURN [CAST([1,2], "UINT64[2]"),[3,4]];
37+
RETURN [CAST([1,2], "UINT64[2]"),[3,4,6]];
38+
RETURN [CAST([1,2], "UINT64[2]"),[3,4,"d"]];
39+
RETURN [CAST([1,2], "UINT64[2]"),[3,4]];

src/base/schema.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ impl std::fmt::Display for FieldSchema {
342342
pub struct CollectorSchema {
343343
pub fields: Vec<FieldSchema>,
344344
/// If specified, the collector will have an automatically generated UUID field with the given index.
345-
pub auto_uuid_field_idx: Option<u32>,
345+
pub auto_uuid_field_idx: Option<usize>,
346346
}
347347

348348
impl std::fmt::Display for CollectorSchema {

src/base/value.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use super::schema::*;
22
use crate::base::duration::parse_duration;
3+
use crate::prelude::invariance_violation;
34
use crate::{api_bail, api_error};
45
use anyhow::Result;
56
use base64::prelude::*;
@@ -175,6 +176,26 @@ impl std::fmt::Display for KeyValue {
175176
}
176177

177178
impl KeyValue {
179+
pub fn from_json(value: serde_json::Value, fields_schema: &[FieldSchema]) -> Result<Self> {
180+
let value = if fields_schema.len() == 1 {
181+
Value::from_json(value, &fields_schema[0].value_type.typ)?
182+
} else {
183+
let field_values: FieldValues = FieldValues::from_json(value, fields_schema)?;
184+
Value::Struct(field_values)
185+
};
186+
Ok(value.as_key()?)
187+
}
188+
189+
pub fn from_values<'a>(values: impl ExactSizeIterator<Item = &'a Value>) -> Result<Self> {
190+
let key = if values.len() == 1 {
191+
let mut values = values;
192+
values.next().ok_or_else(invariance_violation)?.as_key()?
193+
} else {
194+
KeyValue::Struct(values.map(|v| v.as_key()).collect::<Result<Vec<_>>>()?)
195+
};
196+
Ok(key)
197+
}
198+
178199
pub fn fields_iter(&self, num_fields: usize) -> Result<impl Iterator<Item = &KeyValue>> {
179200
let slice = if num_fields == 1 {
180201
std::slice::from_ref(self)

src/builder/analyzer.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -987,7 +987,6 @@ impl AnalyzerContext<'_> {
987987
.fields
988988
.iter()
989989
.position(|field| &field.name == f)
990-
.map(|idx| idx as u32)
991990
.ok_or_else(|| anyhow!("field not found: {}", f))
992991
})
993992
.collect::<Result<Vec<_>>>()?;
@@ -1007,7 +1006,7 @@ impl AnalyzerContext<'_> {
10071006
let mut value_fields_schema: Vec<FieldSchema> = vec![];
10081007
let mut value_fields_idx = vec![];
10091008
for (idx, field) in collector_schema.fields.iter().enumerate() {
1010-
if !pk_fields_idx.contains(&(idx as u32)) {
1009+
if !pk_fields_idx.contains(&idx) {
10111010
value_fields_schema.push(field.clone());
10121011
value_fields_idx.push(idx as u32);
10131012
}

src/builder/plan.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ pub struct AnalyzedCollectOp {
9494
}
9595

9696
pub enum AnalyzedPrimaryKeyDef {
97-
Fields(Vec<u32>),
97+
Fields(Vec<usize>),
9898
}
9999

100100
pub struct AnalyzedExportOp {

src/execution/row_indexer.rs

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -24,20 +24,11 @@ pub fn extract_primary_key(
2424
primary_key_def: &AnalyzedPrimaryKeyDef,
2525
record: &FieldValues,
2626
) -> Result<KeyValue> {
27-
let key = match primary_key_def {
27+
match primary_key_def {
2828
AnalyzedPrimaryKeyDef::Fields(fields) => {
29-
if fields.len() == 1 {
30-
record.fields[fields[0] as usize].as_key()?
31-
} else {
32-
let mut key_values = Vec::with_capacity(fields.len());
33-
for field in fields.iter() {
34-
key_values.push(record.fields[*field as usize].as_key()?);
35-
}
36-
KeyValue::Struct(key_values)
37-
}
29+
KeyValue::from_values(fields.iter().map(|field| &record.fields[*field as usize]))
3830
}
39-
};
40-
Ok(key)
31+
}
4132
}
4233

4334
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default)]

0 commit comments

Comments
 (0)