Skip to content

Commit de9d6e4

Browse files
authored
Merge branch 'main' into patch-1
2 parents 7ebca9b + 03ddb3c commit de9d6e4

File tree

18 files changed

+172
-81
lines changed

18 files changed

+172
-81
lines changed

.github/workflows/pr-e2e-tests.yaml

Lines changed: 50 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -18,36 +18,6 @@ jobs:
1818
python-version: ['3.9', '3.13']
1919
neo4j-tag:
2020
- 'latest'
21-
services:
22-
t2v-transformers:
23-
image: cr.weaviate.io/semitechnologies/transformers-inference:sentence-transformers-all-MiniLM-L6-v2
24-
env:
25-
ENABLE_CUDA: '0'
26-
weaviate:
27-
image: cr.weaviate.io/semitechnologies/weaviate:1.25.1
28-
env:
29-
TRANSFORMERS_INFERENCE_API: 'http://t2v-transformers:8080'
30-
AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'true'
31-
DEFAULT_VECTORIZER_MODULE: 'text2vec-transformers'
32-
ENABLE_MODULES: 'text2vec-transformers'
33-
CLUSTER_HOSTNAME: 'node1'
34-
ports:
35-
- 8080:8080
36-
- 50051:50051
37-
neo4j:
38-
image: neo4j:${{ matrix.neo4j-tag }}
39-
env:
40-
NEO4J_AUTH: neo4j/password
41-
NEO4J_ACCEPT_LICENSE_AGREEMENT: 'eval'
42-
NEO4J_PLUGINS: '["apoc"]'
43-
ports:
44-
- 7687:7687
45-
- 7474:7474
46-
qdrant:
47-
image: qdrant/qdrant
48-
ports:
49-
- 6333:6333
50-
5121
steps:
5222
- name: Check out repository code
5323
uses: actions/checkout@v4
@@ -62,6 +32,40 @@ jobs:
6232
docker builder prune -af || true
6333
sudo apt-get clean || true
6434
df -h
35+
- name: Create Docker network
36+
run: docker network create test-network
37+
- name: Start t2v-transformers
38+
run: |
39+
docker run -d --name t2v-transformers \
40+
--network test-network \
41+
-e ENABLE_CUDA=0 \
42+
cr.weaviate.io/semitechnologies/transformers-inference:sentence-transformers-all-MiniLM-L6-v2
43+
- name: Start Weaviate
44+
run: |
45+
docker run -d --name weaviate \
46+
--network test-network \
47+
-p 8080:8080 -p 50051:50051 \
48+
-e TRANSFORMERS_INFERENCE_API='http://t2v-transformers:8080' \
49+
-e AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='true' \
50+
-e DEFAULT_VECTORIZER_MODULE='text2vec-transformers' \
51+
-e ENABLE_MODULES='text2vec-transformers' \
52+
-e CLUSTER_HOSTNAME='node1' \
53+
cr.weaviate.io/semitechnologies/weaviate:1.25.1
54+
- name: Start Neo4j
55+
run: |
56+
docker run -d --name neo4j \
57+
--network test-network \
58+
-p 7687:7687 -p 7474:7474 \
59+
-e NEO4J_AUTH=neo4j/password \
60+
-e NEO4J_ACCEPT_LICENSE_AGREEMENT=eval \
61+
-e NEO4J_PLUGINS='["apoc"]' \
62+
neo4j:${{ matrix.neo4j-tag }}
63+
- name: Start Qdrant
64+
run: |
65+
docker run -d --name qdrant \
66+
--network test-network \
67+
-p 6333:6333 \
68+
qdrant/qdrant
6569
- name: Set up Python ${{ matrix.python-version }}
6670
uses: actions/setup-python@v5
6771
with:
@@ -93,6 +97,22 @@ jobs:
9397
run: |
9498
set +e
9599
count=0; until curl -s --fail localhost:8080/v1/.well-known/ready; do ((count++)); [ $count -ge 10 ] && echo "Reached maximum retry limit" && exit 1; sleep 15; done
100+
- name: Wait for Neo4j to be ready
101+
shell: bash
102+
run: |
103+
echo "Waiting for Neo4j to be ready..."
104+
count=0
105+
until curl -s --fail http://localhost:7474 > /dev/null 2>&1; do
106+
((count++))
107+
if [ $count -ge 30 ]; then
108+
echo "Neo4j failed to start within timeout"
109+
docker logs neo4j
110+
exit 1
111+
fi
112+
echo "Waiting for Neo4j... (attempt $count/30)"
113+
sleep 5
114+
done
115+
echo "Neo4j is ready!"
96116
- name: Run tests
97117
shell: bash
98118
run: |

.github/workflows/scheduled-e2e-tests.yaml

Lines changed: 51 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ on:
77
push:
88
branches:
99
- main
10-
10+
1111
concurrency:
1212
group: ${{ github.workflow }}-${{ github.ref_name }}
1313
cancel-in-progress: true
@@ -22,45 +22,6 @@ jobs:
2222
- '5-community'
2323
- '5-enterprise'
2424
- 'latest'
25-
services:
26-
t2v-transformers:
27-
image: cr.weaviate.io/semitechnologies/transformers-inference:sentence-transformers-all-MiniLM-L6-v2
28-
env:
29-
ENABLE_CUDA: '0'
30-
credentials:
31-
username: ${{ secrets.DOCKERHUB_USERNAME }}
32-
password: ${{ secrets.DOCKERHUB_TOKEN }}
33-
weaviate:
34-
image: cr.weaviate.io/semitechnologies/weaviate:1.25.1
35-
env:
36-
TRANSFORMERS_INFERENCE_API: 'http://t2v-transformers:8080'
37-
AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'true'
38-
DEFAULT_VECTORIZER_MODULE: 'text2vec-transformers'
39-
ENABLE_MODULES: 'text2vec-transformers'
40-
CLUSTER_HOSTNAME: 'node1'
41-
ports:
42-
- 8080:8080
43-
- 50051:50051
44-
credentials:
45-
username: ${{ secrets.DOCKERHUB_USERNAME }}
46-
password: ${{ secrets.DOCKERHUB_TOKEN }}
47-
neo4j:
48-
image: neo4j:${{ matrix.neo4j-tag }}
49-
env:
50-
NEO4J_AUTH: neo4j/password
51-
NEO4J_ACCEPT_LICENSE_AGREEMENT: 'eval'
52-
NEO4J_PLUGINS: '["apoc"]'
53-
ports:
54-
- 7687:7687
55-
- 7474:7474
56-
credentials:
57-
username: ${{ secrets.DOCKERHUB_USERNAME }}
58-
password: ${{ secrets.DOCKERHUB_TOKEN }}
59-
qdrant:
60-
image: qdrant/qdrant
61-
ports:
62-
- 6333:6333
63-
6425
steps:
6526
- name: Check out repository code
6627
uses: actions/checkout@v4
@@ -75,6 +36,40 @@ jobs:
7536
docker builder prune -af || true
7637
sudo apt-get clean || true
7738
df -h
39+
- name: Create Docker network
40+
run: docker network create test-network
41+
- name: Start t2v-transformers
42+
run: |
43+
docker run -d --name t2v-transformers \
44+
--network test-network \
45+
-e ENABLE_CUDA=0 \
46+
cr.weaviate.io/semitechnologies/transformers-inference:sentence-transformers-all-MiniLM-L6-v2
47+
- name: Start Weaviate
48+
run: |
49+
docker run -d --name weaviate \
50+
--network test-network \
51+
-p 8080:8080 -p 50051:50051 \
52+
-e TRANSFORMERS_INFERENCE_API='http://t2v-transformers:8080' \
53+
-e AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='true' \
54+
-e DEFAULT_VECTORIZER_MODULE='text2vec-transformers' \
55+
-e ENABLE_MODULES='text2vec-transformers' \
56+
-e CLUSTER_HOSTNAME='node1' \
57+
cr.weaviate.io/semitechnologies/weaviate:1.25.1
58+
- name: Start Neo4j
59+
run: |
60+
docker run -d --name neo4j \
61+
--network test-network \
62+
-p 7687:7687 -p 7474:7474 \
63+
-e NEO4J_AUTH=neo4j/password \
64+
-e NEO4J_ACCEPT_LICENSE_AGREEMENT=eval \
65+
-e NEO4J_PLUGINS='["apoc"]' \
66+
neo4j:${{ matrix.neo4j-tag }}
67+
- name: Start Qdrant
68+
run: |
69+
docker run -d --name qdrant \
70+
--network test-network \
71+
-p 6333:6333 \
72+
qdrant/qdrant
7873
- name: Set up Python ${{ matrix.python-version }}
7974
uses: actions/setup-python@v5
8075
with:
@@ -106,6 +101,22 @@ jobs:
106101
run: |
107102
set +e
108103
count=0; until curl -s --fail localhost:8080/v1/.well-known/ready; do ((count++)); [ $count -ge 10 ] && echo "Reached maximum retry limit" && exit 1; sleep 15; done
104+
- name: Wait for Neo4j to be ready
105+
shell: bash
106+
run: |
107+
echo "Waiting for Neo4j to be ready..."
108+
count=0
109+
until curl -s --fail http://localhost:7474 > /dev/null 2>&1; do
110+
((count++))
111+
if [ $count -ge 30 ]; then
112+
echo "Neo4j failed to start within timeout"
113+
docker logs neo4j
114+
exit 1
115+
fi
116+
echo "Waiting for Neo4j... (attempt $count/30)"
117+
sleep 5
118+
done
119+
echo "Neo4j is ready!"
109120
- name: Run tests
110121
shell: bash
111122
run: |

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@
22

33
## Next
44

5+
### Added
6+
7+
- Added an optional `node_label_neo4j` parameter in the external retrievers to speed up the search query in Neo4j.
8+
9+
510
## 1.10.1
611

712
### Added

docs/source/user_guide_rag.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -762,6 +762,7 @@ Weaviate Retrievers
762762
collection="Movies",
763763
id_property_external="neo4j_id",
764764
id_property_neo4j="id",
765+
node_label_neo4j="Document", # optional
765766
)
766767
767768
Internally, this retriever performs the vector search in Weaviate, finds the corresponding node by matching
@@ -795,6 +796,7 @@ Pinecone Retrievers
795796
index_name="Movies",
796797
id_property_neo4j="id",
797798
embedder=embedder,
799+
node_label_neo4j="Document", # optional
798800
)
799801
800802
Also see :ref:`pineconeneo4jretriever`.
@@ -825,6 +827,7 @@ Qdrant Retrievers
825827
id_property_external="neo4j_id", # The payload field that contains identifier to a corresponding Neo4j node id property
826828
id_property_neo4j="id",
827829
embedder=embedder,
830+
node_label_neo4j="Document", # optional
828831
)
829832
830833
See :ref:`qdrantneo4jretriever`.

src/neo4j_graphrag/retrievers/base.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -454,10 +454,12 @@ def __init__(
454454
id_property_external: str,
455455
id_property_neo4j: str,
456456
neo4j_database: Optional[str] = None,
457+
node_label_neo4j: Optional[str] = None,
457458
):
458459
super().__init__(driver)
459460
self.id_property_external = id_property_external
460461
self.id_property_neo4j = id_property_neo4j
462+
self.node_label_neo4j = node_label_neo4j
461463
self.neo4j_database = neo4j_database
462464

463465
@abstractmethod

src/neo4j_graphrag/retrievers/external/pinecone/pinecone.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ class PineconeNeo4jRetriever(ExternalRetriever):
8383
retrieval_query (str): Cypher query that gets appended.
8484
result_formatter (Optional[Callable[[neo4j.Record], RetrieverResultItem]]): Function to transform a neo4j.Record to a RetrieverResultItem.
8585
neo4j_database (Optional[str]): The name of the Neo4j database. If not provided, this defaults to the server's default database ("neo4j" by default) (`see reference to documentation <https://neo4j.com/docs/operations-manual/current/database-administration/#manage-databases-default>`_).
86+
node_label_neo4j (Optional[str]): The label of the Neo4j node to retrieve. This label must be properly escaped if needed, eg "`Label with spaces`".
8687
8788
Raises:
8889
RetrieverInitializationError: If validation of the input arguments fail.
@@ -101,6 +102,7 @@ def __init__(
101102
Callable[[neo4j.Record], RetrieverResultItem]
102103
] = None,
103104
neo4j_database: Optional[str] = None,
105+
node_label_neo4j: Optional[str] = None,
104106
):
105107
try:
106108
driver_model = Neo4jDriverModel(driver=driver)
@@ -116,6 +118,7 @@ def __init__(
116118
retrieval_query=retrieval_query,
117119
result_formatter=result_formatter,
118120
neo4j_database=neo4j_database,
121+
node_label_neo4j=node_label_neo4j,
119122
)
120123
except ValidationError as e:
121124
raise RetrieverInitializationError(e.errors()) from e
@@ -125,6 +128,7 @@ def __init__(
125128
id_property_external="id",
126129
id_property_neo4j=validated_data.id_property_neo4j,
127130
neo4j_database=neo4j_database,
131+
node_label_neo4j=node_label_neo4j,
128132
)
129133
self.driver = validated_data.driver_model.driver
130134
self.client = validated_data.client_model.client
@@ -172,7 +176,8 @@ def get_search_results(
172176
driver=neo4j_driver,
173177
client=pc_client,
174178
index_name="jeopardy",
175-
id_property_neo4j="id"
179+
id_property_neo4j="id",
180+
node_label_neo4j="Document",
176181
)
177182
biology_embedding = ...
178183
retriever.search(query_vector=biology_embedding, top_k=2)
@@ -223,6 +228,7 @@ def get_search_results(
223228
search_query = get_match_query(
224229
return_properties=self.return_properties,
225230
retrieval_query=self.retrieval_query,
231+
node_label=self.node_label_neo4j,
226232
)
227233

228234
parameters = {

src/neo4j_graphrag/retrievers/external/pinecone/types.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,3 +59,4 @@ class PineconeNeo4jRetrieverModel(BaseModel):
5959
retrieval_query: Optional[str] = None
6060
result_formatter: Optional[Callable[[neo4j.Record], RetrieverResultItem]] = None
6161
neo4j_database: Optional[str] = None
62+
node_label_neo4j: Optional[str] = None

src/neo4j_graphrag/retrievers/external/qdrant/qdrant.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ class QdrantNeo4jRetriever(ExternalRetriever):
7979
return_properties (Optional[list[str]]): List of node properties to return.
8080
result_formatter (Optional[Callable[[neo4j.Record], RetrieverResultItem]]): Function to transform a neo4j.Record to a RetrieverResultItem.
8181
neo4j_database (Optional[str]): The name of the Neo4j database. If not provided, this defaults to the server's default database ("neo4j" by default) (`see reference to documentation <https://neo4j.com/docs/operations-manual/current/database-administration/#manage-databases-default>`_).
82+
node_label_neo4j (Optional[str]): The label of the Neo4j node to retrieve. This label must be properly escaped if needed, eg "`Label with spaces`".
8283
8384
Raises:
8485
RetrieverInitializationError: If validation of the input arguments fail.
@@ -99,6 +100,7 @@ def __init__(
99100
Callable[[neo4j.Record], RetrieverResultItem]
100101
] = None,
101102
neo4j_database: Optional[str] = None,
103+
node_label_neo4j: Optional[str] = None,
102104
):
103105
try:
104106
driver_model = Neo4jDriverModel(driver=driver)
@@ -116,6 +118,7 @@ def __init__(
116118
retrieval_query=retrieval_query,
117119
result_formatter=result_formatter,
118120
neo4j_database=neo4j_database,
121+
node_label_neo4j=node_label_neo4j,
119122
)
120123
except ValidationError as e:
121124
raise RetrieverInitializationError(e.errors()) from e
@@ -125,6 +128,7 @@ def __init__(
125128
id_property_external=validated_data.id_property_external,
126129
id_property_neo4j=validated_data.id_property_neo4j,
127130
neo4j_database=neo4j_database,
131+
node_label_neo4j=node_label_neo4j,
128132
)
129133
self.driver = validated_data.driver_model.driver
130134
self.client = validated_data.client_model.client
@@ -169,7 +173,8 @@ def get_search_results(
169173
driver=neo4j_driver,
170174
client=client,
171175
collection_name="my_collection",
172-
id_property_external="neo4j_id"
176+
id_property_external="neo4j_id",
177+
node_label_neo4j="Document",
173178
)
174179
embedding = ...
175180
retriever.search(query_vector=embedding, top_k=2)
@@ -223,6 +228,7 @@ def get_search_results(
223228
search_query = get_match_query(
224229
return_properties=self.return_properties,
225230
retrieval_query=self.retrieval_query,
231+
node_label=self.node_label_neo4j,
226232
)
227233

228234
parameters = {

src/neo4j_graphrag/retrievers/external/qdrant/types.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,3 +54,4 @@ class QdrantNeo4jRetrieverModel(BaseModel):
5454
retrieval_query: Optional[str] = None
5555
result_formatter: Optional[Callable[[neo4j.Record], RetrieverResultItem]] = None
5656
neo4j_database: Optional[str] = None
57+
node_label_neo4j: Optional[str] = None

src/neo4j_graphrag/retrievers/external/utils.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,18 @@
2020

2121

2222
def get_match_query(
23-
return_properties: Optional[list[str]] = None, retrieval_query: Optional[str] = None
23+
return_properties: Optional[list[str]] = None,
24+
retrieval_query: Optional[str] = None,
25+
node_label: Optional[str] = None,
2426
) -> str:
27+
# node_label is not escaped on purpose, allowing users to use any valid
28+
# node label expression, e.g. "Actor|Director". It's up to the user to ensure
29+
# labels are properly escaped, i.e. "`My label with space`".
30+
node_label_expression = f":{node_label}" if node_label else ""
2531
match_query = (
2632
"UNWIND $match_params AS match_param "
2733
"WITH match_param[0] AS match_id_value, match_param[1] AS score "
28-
"MATCH (node) "
34+
f"MATCH (node{node_label_expression}) "
2935
"WHERE node[$id_property] = match_id_value "
3036
)
3137
return match_query + get_query_tail(

0 commit comments

Comments
 (0)