Skip to content

Commit 00ab5a8

Browse files
Add e2e graph correctness testing structure (#233)
1 parent 905e31f commit 00ab5a8

File tree

7 files changed

+615
-14
lines changed

7 files changed

+615
-14
lines changed

.github/workflows/test.yml

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@ env:
44
PY_VERSION: "3.9"
55
CORE_COUNT: "8"
66
MAGE_CONTAINER: "mage"
7+
MEMGRAPH_PORT: 7687
8+
NEO4J_PORT: 7688
9+
NEO4J_CONTAINER: "neo4j_test"
710

811
on: [pull_request, workflow_dispatch]
912

@@ -57,7 +60,7 @@ jobs:
5760
5861
- name: Run Memgraph MAGE:prod image
5962
run: |
60-
docker run -d -p 7687:7687 --name ${{ env.MAGE_CONTAINER }} memgraph-mage:${{ matrix.target }} --telemetry-enabled=False
63+
docker run -d -p ${{ env.MEMGRAPH_PORT }}:7687 --name ${{ env.MAGE_CONTAINER }} memgraph-mage:${{ matrix.target }} --telemetry-enabled=False
6164
6265
- name: Set up C++
6366
run: |
@@ -108,3 +111,23 @@ jobs:
108111
run: |
109112
cd e2e
110113
python -m pytest . -k 'not cugraph'
114+
115+
- name: Run End-to-end correctness tests
116+
if: matrix.architecture != 'arm64'
117+
env:
118+
PYTHONPATH: "$PWD/e2e"
119+
run: |
120+
docker run --rm \
121+
--name ${{ env.NEO4J_CONTAINER}} \
122+
-p 7474:7474 -p ${{ env.NEO4J_PORT }}:7687 \
123+
--rm \
124+
-d \
125+
-v $HOME/neo4j/plugins:/plugins \
126+
--env NEO4J_AUTH=none \
127+
-e NEO4J_apoc_export_file_enabled=true \
128+
-e NEO4J_apoc_import_file_enabled=true \
129+
-e NEO4J_apoc_import_file_use__neo4j__config=true \
130+
-e NEO4JLABS_PLUGINS=\["apoc"\] neo4j:5.10.0
131+
sleep 5
132+
python3 test_e2e_correctness.py --memgraph-port ${{ env.MEMGRAPH_PORT }} --neo4j-port ${{ env.NEO4J_PORT }}
133+
docker stop ${{ env.NEO4J_CONTAINER}}

e2e_correctness/conftest.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
2+
3+
def pytest_addoption(parser):
4+
parser.addoption("--memgraph-port", type=int, action="store")
5+
parser.addoption("--neo4j-port", type=int, action="store")

e2e_correctness/pytest.ini

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
[pytest]
2+
addopts =
3+
-vv --quiet --maxfail=10
4+
python_files = test_modules.py
5+
testpaths = .

e2e_correctness/query_neo_mem.py

Lines changed: 275 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,275 @@
1+
"""
2+
This module queries Memgraph and Neo4j and creates Graph from JSON exported from Memgraph and
3+
JSON from APOC from Neo4j
4+
5+
As of 17.7.2023. when importing data via Cypherl, new ids is given to each node in Memgraph and Neo4j.
6+
7+
When exporting data Memgraph export_util uses internal Memgraph ids to export data.
8+
9+
To overcome the issue of different internal IDs in Neo4j and Memgraph, we use the `id` node property as identifier.
10+
11+
Workaround would be to add API to create nodes by ids on Memgraph when importing via import_util.
12+
"""
13+
14+
import gqlalchemy
15+
import json
16+
import logging
17+
import neo4j
18+
import re
19+
20+
from typing import Any, Dict, List
21+
22+
logging.basicConfig(format="%(asctime)-15s [%(levelname)s]: %(message)s")
23+
logger = logging.getLogger("query_neo_mem")
24+
logger.setLevel(logging.DEBUG)
25+
26+
27+
class Vertex:
28+
def __init__(self, id: int, labels: List[str], properties: Dict[str, Any]):
29+
self._id = id
30+
self._labels = labels
31+
self._properties = properties
32+
self._labels.sort()
33+
34+
@property
35+
def id(self) -> int:
36+
return self._id
37+
38+
def __str__(self) -> str:
39+
return f"Vertex: {self._id}, {self._labels}, {self._properties}"
40+
41+
def __eq__(self, other):
42+
assert isinstance(
43+
other, Vertex
44+
), f"Comparing vertex with object of type {type(other)}"
45+
logger.debug(f"comparing Vertex with {self._id} to {other._id}")
46+
if self._id != other._id:
47+
logger.debug(f"_id different: {self._id} vs {other._id}")
48+
return False
49+
if self._labels != other._labels:
50+
logger.debug(
51+
f"_labels different between {self._id} and {other._id}: {self._labels} vs {other._labels}"
52+
)
53+
return False
54+
for k, v in self._properties.items():
55+
if k not in other._properties:
56+
logger.debug(f"Property with key {k} not in {other._properties.keys()}")
57+
return False
58+
if v != other._properties[k]:
59+
logger.debug(f"Value {v} not equal to {other._properties[k]}")
60+
return False
61+
return True
62+
63+
64+
class Edge:
65+
def __init__(
66+
self, from_vertex: int, to_vertex: int, label: str, properties: Dict[str, Any]
67+
):
68+
self._from_vertex = from_vertex
69+
self._to_vertex = to_vertex
70+
self._label = label
71+
self._properties = properties
72+
73+
@property
74+
def from_vertex(self) -> int:
75+
return self._from_vertex
76+
77+
@property
78+
def to_vertex(self) -> int:
79+
return self._to_vertex
80+
81+
def __eq__(self, other):
82+
assert isinstance(
83+
other, Edge
84+
), f"Comparing Edge with object of type: {type(other)}"
85+
logger.debug(
86+
f"comparing Edge ({self._from_vertex}, {self._to_vertex}) to\
87+
({other._from_vertex, other._to_vertex})"
88+
)
89+
# Return True if self and other have the same length
90+
if self._from_vertex != other._from_vertex:
91+
logger.debug(
92+
f"Source vertex is different {self._from_vertex} <> {other._from_vertex}"
93+
)
94+
return False
95+
if self._to_vertex != other._to_vertex:
96+
logger.debug(
97+
f"Destination vertex is different {self._to_vertex} <> {other._to_vertex}"
98+
)
99+
return False
100+
if self._label != other._label:
101+
logger.debug(f"Label is different {self._label} <> {other._label}")
102+
return False
103+
for k, v in self._properties.items():
104+
if k not in other._properties:
105+
logger.debug(f"Property with key {k} not in {other._properties.keys()}")
106+
return False
107+
if v != other._properties[k]:
108+
logger.debug(f"Value {v} not equal to {other._properties[k]}")
109+
return False
110+
return True
111+
112+
113+
class Graph:
114+
def __init__(self):
115+
self._vertices = []
116+
self._edges = []
117+
118+
def add_vertex(self, vertex: Vertex):
119+
self._vertices.append(vertex)
120+
121+
def add_edge(self, edge: Edge):
122+
self._edges.append(edge)
123+
124+
@property
125+
def vertices(self):
126+
return self._vertices
127+
128+
@property
129+
def edges(self):
130+
return self._edges
131+
132+
133+
def get_neo4j_data_json(driver) -> str:
134+
with driver.session() as session:
135+
query = neo4j.Query(
136+
"CALL apoc.export.json.all(null,{useTypes:true, stream:true}) YIELD data RETURN data;"
137+
)
138+
result = session.run(query).values()
139+
140+
res_str = re.sub(r"\\n", ",\n", str(result[0]))
141+
res_str = re.sub(r"'", "", res_str)
142+
143+
return json.loads(res_str)
144+
145+
146+
def get_memgraph_data_json_format(memgraph: gqlalchemy.Memgraph):
147+
result = list(
148+
memgraph.execute_and_fetch(
149+
f"""
150+
CALL export_util.json_stream() YIELD stream RETURN stream;
151+
"""
152+
)
153+
)[0]["stream"]
154+
return json.loads(result)
155+
156+
157+
def extract_vertex_from_json(item) -> Vertex:
158+
assert (
159+
item["properties"]["id"] is not None
160+
), "Vertex in JSON doesn't have ID property"
161+
return Vertex(item["properties"]["id"], item["labels"], item["properties"])
162+
163+
164+
def create_edge_from_data(from_vertex_id: int, to_vertex_id: int, item) -> Edge:
165+
return Edge(from_vertex_id, to_vertex_id, item["label"], item["properties"])
166+
167+
168+
def create_graph_memgraph_json(json_memgraph_data) -> Graph:
169+
logger.debug(f"Memgraph JSON data {json_memgraph_data}")
170+
graph = Graph()
171+
vertices_id_mapings = {}
172+
for item in json_memgraph_data:
173+
if item["type"] == "node":
174+
graph.add_vertex(extract_vertex_from_json(item))
175+
vertices_id_mapings[item["id"]] = item["properties"]["id"]
176+
else:
177+
graph.add_edge(
178+
create_edge_from_data(
179+
vertices_id_mapings[item["start"]],
180+
vertices_id_mapings[item["end"]],
181+
item,
182+
)
183+
)
184+
185+
graph.vertices.sort(key=lambda vertex: vertex.id)
186+
graph.edges.sort(key=lambda edge: (edge.from_vertex, edge.to_vertex))
187+
return graph
188+
189+
190+
def create_graph_neo4j_json(json_neo4j_data) -> Graph:
191+
logger.debug(f"Neo4j JSON data {json_neo4j_data}")
192+
graph = Graph()
193+
vertices_id_mapings = {}
194+
for item in json_neo4j_data:
195+
if item["type"] == "node":
196+
graph.add_vertex(extract_vertex_from_json(item))
197+
vertices_id_mapings[item["id"]] = item["properties"]["id"]
198+
else:
199+
graph.add_edge(
200+
create_edge_from_data(
201+
vertices_id_mapings[item["start"]["id"]],
202+
vertices_id_mapings[item["end"]["id"]],
203+
item,
204+
)
205+
)
206+
graph.vertices.sort(key=lambda vertex: vertex.id)
207+
graph.edges.sort(key=lambda edge: (edge.from_vertex, edge.to_vertex))
208+
return graph
209+
210+
211+
def create_neo4j_driver(port: int) -> neo4j.BoltDriver:
212+
return neo4j.GraphDatabase.driver(f"bolt://localhost:{port}", encrypted=False)
213+
214+
215+
def create_memgraph_db(port: int) -> gqlalchemy.Memgraph:
216+
return gqlalchemy.Memgraph("localhost", port)
217+
218+
219+
def mg_execute_cyphers(input_cyphers: List[str], db: gqlalchemy.Memgraph):
220+
"""
221+
Execute multiple cypher queries against Memgraph
222+
"""
223+
for query in input_cyphers:
224+
db.execute(query)
225+
226+
227+
def neo4j_execute_cyphers(input_cyphers: List[str], neo4j_driver: neo4j.BoltDriver):
228+
"""
229+
Execute multiple cypher queries against Neo4j
230+
"""
231+
with neo4j_driver.session() as session:
232+
for text_query in input_cyphers:
233+
query = neo4j.Query(text_query)
234+
session.run(query).values()
235+
236+
237+
def run_memgraph_query(query: str, db: gqlalchemy.Memgraph):
238+
"""
239+
Execute query against Memgraph
240+
"""
241+
db.execute(query)
242+
243+
244+
def run_neo4j_query(query: str, neo4j_driver: neo4j.BoltDriver):
245+
"""
246+
Execute query against Neo4j
247+
"""
248+
with neo4j_driver.session() as session:
249+
query = neo4j.Query(query)
250+
session.run(query).values()
251+
252+
253+
def clean_memgraph_db(memgraph_db: gqlalchemy.Memgraph):
254+
memgraph_db.drop_database()
255+
256+
257+
def clean_neo4j_db(neo4j_db: neo4j.BoltDriver):
258+
with neo4j_db.session() as session:
259+
query = neo4j.Query("MATCH (n) DETACH DELETE n;")
260+
session.run(query).values()
261+
262+
263+
def mg_get_graph(memgraph_db: gqlalchemy.Memgraph) -> Graph:
264+
logger.debug("Getting data from Memgraph")
265+
json_data = get_memgraph_data_json_format(memgraph_db)
266+
logger.debug("Building the graph from Memgraph JSON data")
267+
return create_graph_memgraph_json(json_data)
268+
269+
270+
def neo4j_get_graph(neo4j_driver: neo4j.BoltDriver) -> Graph:
271+
logger.debug("Getting data from Neo4j")
272+
json_data = get_neo4j_data_json(neo4j_driver)
273+
logger.debug("Building the graph from Neo4j JSON data")
274+
275+
return create_graph_neo4j_json(json_data)

0 commit comments

Comments
 (0)