Skip to content

Commit 9887565

Browse files
authored
feat: Neo4j vector index method support (#1111)
* feat: Neo4j vector index method support This commit introduces support for specifying vector index methods (HNSW) for Neo4j targets. - Modified `src/base/spec.rs` to derive `Eq` for `VectorIndexMethod`. - Modified `src/ops/targets/neo4j.rs` to: - Allow `VectorIndexMethod` to be passed to `IndexDef::from_vector_index_def`. - Store `VectorIndexMethod` in `IndexDef::VectorIndex`. - Implement error handling for unsupported `VectorIndexMethod` (IVFFlat). - Update `SetupComponentOperator::describe_state` to display the method. - Update `SetupComponentOperator::create` to include HNSW parameters in the Cypher query. * feat: integrate Ollama for LLM tasks to enable on-premises testing - Updated docs_to_knowledge_graph example to use Ollama instead of OpenAI - Added alternative configuration comments for OpenAI usage - Enables end-to-end testing without requiring OpenAI API key - Supports the Neo4j vector index method implementation * Simplify Neo4j vector index * Fix: Refactor LLM usage and update Neo4j vector index This commit addresses feedback on the docs-to-knowledge-graph example and Neo4j vector index implementation. - Refactors the docs-to-knowledge-graph example to use Ollama by default, removing the explicit address parameter. - Updates the corresponding README to reflect the change to Ollama. - Removes the unused `_method` parameter from the `from_vector_index_def` function in the Neo4j target.
1 parent d22f2db commit 9887565

File tree

4 files changed

+45
-12
lines changed

4 files changed

+45
-12
lines changed

examples/docs_to_knowledge_graph/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ Please drop [Cocoindex on Github](https://github.com/cocoindex-io/cocoindex) a s
1616
* [Install Postgres](https://cocoindex.io/docs/getting_started/installation#-install-postgres) if you don't have one.
1717
* Install [Neo4j](https://cocoindex.io/docs/ops/targets#neo4j-dev-instance) or [Kuzu](https://cocoindex.io/docs/ops/targets#kuzu-dev-instance) if you don't have one.
1818
* The example uses Neo4j by default for now. If you want to use Kuzu, find out the "SELECT ONE GRAPH DATABASE TO USE" section and switch the active branch.
19-
* [Configure your OpenAI API key](https://cocoindex.io/docs/ai/llm#openai).
19+
* Install / configure LLM API. In this example we use Ollama, which runs LLM model locally. You need to get it ready following [this guide](https://cocoindex.io/docs/ai/llm#ollama). Alternatively, you can also follow the comments in source code to switch to OpenAI, and [configure OpenAI API key](https://cocoindex.io/docs/ai/llm#openai) before running the example.
2020

2121
## Documentation
2222
You can read the official CocoIndex Documentation for Property Graph Targets [here](https://cocoindex.io/docs/ops/targets#property-graph-targets).

examples/docs_to_knowledge_graph/main.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,14 @@ def docs_to_kg_flow(
8282
cocoindex.functions.ExtractByLlm(
8383
llm_spec=cocoindex.LlmSpec(
8484
# Supported LLM: https://cocoindex.io/docs/ai/llm
85-
api_type=cocoindex.LlmApiType.OPENAI,
86-
model="gpt-4o",
85+
api_type=cocoindex.LlmApiType.OLLAMA,
86+
model="llama3.2",
8787
),
88+
# Alternative: Use OpenAI API model instead of Ollama
89+
# llm_spec=cocoindex.LlmSpec(
90+
# api_type=cocoindex.LlmApiType.OPENAI,
91+
# model="gpt-4o",
92+
# ),
8893
output_type=DocumentSummary,
8994
instruction="Please summarize the content of the document.",
9095
)
@@ -100,9 +105,14 @@ def docs_to_kg_flow(
100105
cocoindex.functions.ExtractByLlm(
101106
llm_spec=cocoindex.LlmSpec(
102107
# Supported LLM: https://cocoindex.io/docs/ai/llm
103-
api_type=cocoindex.LlmApiType.OPENAI,
104-
model="gpt-4o",
108+
api_type=cocoindex.LlmApiType.OLLAMA,
109+
model="llama3.2",
105110
),
111+
# Alternative: Use OpenAI API model instead of Ollama
112+
# llm_spec=cocoindex.LlmSpec(
113+
# api_type=cocoindex.LlmApiType.OPENAI,
114+
# model="gpt-4o",
115+
# ),
106116
output_type=list[Relationship],
107117
instruction=(
108118
"Please extract relationships from CocoIndex documents. "

src/base/spec.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,7 @@ impl fmt::Display for VectorSimilarityMetric {
384384
}
385385
}
386386

387-
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
387+
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
388388
#[serde(tag = "kind")]
389389
pub enum VectorIndexMethod {
390390
Hnsw {

src/ops/targets/neo4j.rs

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -568,9 +568,6 @@ impl SetupState {
568568
.map(|f| (f.name.as_str(), &f.value_type.typ))
569569
.collect::<HashMap<_, _>>();
570570
for index_def in index_options.vector_indexes.iter() {
571-
if index_def.method.is_some() {
572-
api_bail!("Vector index method is not configurable for Neo4j yet");
573-
}
574571
sub_components.push(ComponentState {
575572
object_label: schema.elem_type.clone(),
576573
index_def: IndexDef::from_vector_index_def(
@@ -644,6 +641,7 @@ enum IndexDef {
644641
field_name: String,
645642
metric: spec::VectorSimilarityMetric,
646643
vector_size: usize,
644+
method: Option<spec::VectorIndexMethod>,
647645
},
648646
}
649647

@@ -652,6 +650,10 @@ impl IndexDef {
652650
index_def: &spec::VectorIndexDef,
653651
field_typ: &schema::ValueType,
654652
) -> Result<Self> {
653+
let method = index_def.method.clone();
654+
if let Some(spec::VectorIndexMethod::IvfFlat { .. }) = method {
655+
api_bail!("IVFFlat vector index method is not supported for Neo4j");
656+
}
655657
Ok(Self::VectorIndex {
656658
field_name: index_def.field_name.clone(),
657659
vector_size: (match field_typ {
@@ -664,6 +666,7 @@ impl IndexDef {
664666
api_error!("Vector index field must be a vector with fixed dimension")
665667
})?,
666668
metric: index_def.metric,
669+
method,
667670
})
668671
}
669672
}
@@ -723,9 +726,14 @@ impl components::SetupOperator for SetupComponentOperator {
723726
field_name,
724727
metric,
725728
vector_size,
729+
method,
726730
} => {
731+
let method_str = method
732+
.as_ref()
733+
.map(|m| format!(", method: {}", m))
734+
.unwrap_or_default();
727735
format!(
728-
"{key_desc} ON {label} (field_name: {field_name}, vector_size: {vector_size}, metric: {metric})",
736+
"{key_desc} ON {label} (field_name: {field_name}, vector_size: {vector_size}, metric: {metric}{method_str})",
729737
)
730738
}
731739
}
@@ -752,17 +760,32 @@ impl components::SetupOperator for SetupComponentOperator {
752760
field_name,
753761
metric,
754762
vector_size,
763+
method,
755764
} => {
765+
let mut parts = vec![];
766+
767+
parts.push(format!("`vector.dimensions`: {}", vector_size));
768+
parts.push(format!("`vector.similarity_function`: '{}'", metric));
769+
770+
if let Some(spec::VectorIndexMethod::Hnsw { m, ef_construction }) = method {
771+
if let Some(m_val) = m {
772+
parts.push(format!("`vector.hnsw.m`: {}", m_val));
773+
}
774+
if let Some(ef_val) = ef_construction {
775+
parts.push(format!("`vector.hnsw.ef_construction`: {}", ef_val));
776+
}
777+
}
778+
756779
formatdoc! {"
757780
CREATE VECTOR INDEX {name} IF NOT EXISTS
758781
FOR {matcher} ON {qualifier}.{field_name}
759782
OPTIONS {{
760783
indexConfig: {{
761-
`vector.dimensions`: {vector_size},
762-
`vector.similarity_function`: '{metric}'
784+
{config}
763785
}}
764786
}}",
765787
name = key.name,
788+
config = parts.join(", ")
766789
}
767790
}
768791
});

0 commit comments

Comments
 (0)