Skip to content

Commit fb04d59

Browse files
authored
smart graph support (#61)
* smart graph support | initial commit * cleanup: `_create_node_attr_dict` * new: `cast_to_string` * cleanup: overrides * fix: lint * fix: `overwrite_graph` * new: `GraphNotEmpty` exception * lock deps * remove: `_get_smart_id` * new: `test_load_graph_from_nxadb_as_smart_graph` * new: `add_nodes_from_override` * fix: typo * fix: lint * fix: pyproject * add comment * `overwrite_graph` docstring * update `adbnx-adapter` version * fix: var name * fix: `GraphNotEmpty` logic * fix: whitespace * fix: drop instead of truncate * Revert "fix: drop instead of truncate" This reverts commit 11347c9. * add `overwrite_graph` coverage * fix: drop graph instead of truncate * fix: docstring * fix: `name` docstring
1 parent 7999151 commit fb04d59

File tree

9 files changed

+423
-157
lines changed

9 files changed

+423
-157
lines changed

nx_arangodb/classes/dict/node.py

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,8 @@ def update(self, attrs: Any) -> None:
221221
if not attrs:
222222
return
223223

224-
self.data.update(build_node_attr_dict_data(self, attrs))
224+
node_attr_dict_data = build_node_attr_dict_data(self, attrs)
225+
self.data.update(node_attr_dict_data)
225226

226227
if not self.node_id:
227228
logger.debug("Node ID not set, skipping NodeAttrDict(?).update()")
@@ -275,10 +276,12 @@ def __init__(
275276
self.FETCHED_ALL_DATA = False
276277
self.FETCHED_ALL_IDS = False
277278

278-
def _create_node_attr_dict(self, vertex: dict[str, Any]) -> NodeAttrDict:
279+
def _create_node_attr_dict(
280+
self, node_id: str, node_data: dict[str, Any]
281+
) -> NodeAttrDict:
279282
node_attr_dict = self.node_attr_dict_factory()
280-
node_attr_dict.node_id = vertex["_id"]
281-
node_attr_dict.data = build_node_attr_dict_data(node_attr_dict, vertex)
283+
node_attr_dict.node_id = node_id
284+
node_attr_dict.data = build_node_attr_dict_data(node_attr_dict, node_data)
282285

283286
return node_attr_dict
284287

@@ -321,8 +324,8 @@ def __getitem__(self, key: str) -> NodeAttrDict:
321324
if node_id not in self.data and self.FETCHED_ALL_IDS:
322325
raise KeyError(key)
323326

324-
if vertex_db := vertex_get(self.graph, node_id):
325-
node_attr_dict = self._create_node_attr_dict(vertex_db)
327+
if node := vertex_get(self.graph, node_id):
328+
node_attr_dict = self._create_node_attr_dict(node["_id"], node)
326329
self.data[node_id] = node_attr_dict
327330

328331
return node_attr_dict
@@ -331,18 +334,16 @@ def __getitem__(self, key: str) -> NodeAttrDict:
331334

332335
@key_is_string
333336
def __setitem__(self, key: str, value: NodeAttrDict) -> None:
334-
"""G._node['node/1'] = {'foo': 'bar'}
335-
336-
Not to be confused with:
337-
- G.add_node('node/1', foo='bar')
338-
"""
337+
"""G._node['node/1'] = {'foo': 'bar'}"""
339338
assert isinstance(value, NodeAttrDict)
340339

341340
node_type, node_id = get_node_type_and_id(key, self.default_node_type)
342341

343342
result = doc_insert(self.db, node_type, node_id, value.data)
344343

345-
node_attr_dict = self._create_node_attr_dict(result)
344+
node_attr_dict = self._create_node_attr_dict(
345+
result["_id"], {**value.data, **result}
346+
)
346347

347348
self.data[node_id] = node_attr_dict
348349

@@ -405,10 +406,7 @@ def copy(self) -> Any:
405406
@keys_are_strings
406407
def __update_local_nodes(self, nodes: Any) -> None:
407408
for node_id, node_data in nodes.items():
408-
node_attr_dict = self.node_attr_dict_factory()
409-
node_attr_dict.node_id = node_id
410-
node_attr_dict.data = build_node_attr_dict_data(node_attr_dict, node_data)
411-
409+
node_attr_dict = self._create_node_attr_dict(node_id, node_data)
412410
self.data[node_id] = node_attr_dict
413411

414412
@keys_are_strings
@@ -478,7 +476,7 @@ def _fetch_all(self):
478476

479477
for node_id, node_data in node_dict.items():
480478
del node_data["_rev"] # TODO: Optimize away via phenolrs
481-
node_attr_dict = self._create_node_attr_dict(node_data)
479+
node_attr_dict = self._create_node_attr_dict(node_data["_id"], node_data)
482480
self.data[node_id] = node_attr_dict
483481

484482
self.FETCHED_ALL_DATA = True

nx_arangodb/classes/digraph.py

Lines changed: 65 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,8 @@ class DiGraph(Graph, nx.DiGraph):
6060
name : str (optional, default: None)
6161
Name of the graph in the database. If the graph already exists,
6262
the user can pass the name of the graph to connect to it. If
63-
the graph does not exist, the user can create a new graph by
64-
passing the name. NOTE: Must be used in conjunction with
63+
the graph does not exist, a General Graph will be created by
64+
passing the **name**. NOTE: Must be used in conjunction with
6565
**incoming_graph_data** if the user wants to persist the graph
6666
in ArangoDB.
6767
@@ -125,6 +125,12 @@ class DiGraph(Graph, nx.DiGraph):
125125
whenever possible. NOTE: This feature is experimental and may not work
126126
as expected.
127127
128+
overwrite_graph : bool (optional, default: False)
129+
Whether to overwrite the graph in the database if it already exists. If
130+
set to True, the graph collections will be dropped and recreated. Note that
131+
this operation is irreversible and will result in the loss of all data in
132+
the graph. NOTE: If set to True, Collection Indexes will also be lost.
133+
128134
args: positional arguments for nx.Graph
129135
Additional arguments passed to nx.Graph.
130136
@@ -154,6 +160,7 @@ def __init__(
154160
write_async: bool = True,
155161
symmetrize_edges: bool = False,
156162
use_arango_views: bool = False,
163+
overwrite_graph: bool = False,
157164
*args: Any,
158165
**kwargs: Any,
159166
):
@@ -171,13 +178,15 @@ def __init__(
171178
write_async,
172179
symmetrize_edges,
173180
use_arango_views,
181+
overwrite_graph,
174182
*args,
175183
**kwargs,
176184
)
177185

178186
if self.graph_exists_in_db:
179187
self.clear_edges = self.clear_edges_override
180188
self.add_node = self.add_node_override
189+
self.add_nodes_from = self.add_nodes_from_override
181190
self.remove_node = self.remove_node_override
182191
self.reverse = self.reverse_override
183192

@@ -194,6 +203,7 @@ def __init__(
194203
and not self._loaded_incoming_graph_data
195204
):
196205
nx.convert.to_networkx_graph(incoming_graph_data, create_using=self)
206+
self._loaded_incoming_graph_data = True
197207

198208
#######################
199209
# nx.DiGraph Overides #
@@ -225,9 +235,10 @@ def clear_edges_override(self):
225235
super().clear_edges()
226236

227237
def add_node_override(self, node_for_adding, **attr):
238+
if node_for_adding is None:
239+
raise ValueError("None cannot be a node")
240+
228241
if node_for_adding not in self._succ:
229-
if node_for_adding is None:
230-
raise ValueError("None cannot be a node")
231242

232243
self._succ[node_for_adding] = self.adjlist_inner_dict_factory()
233244
self._pred[node_for_adding] = self.adjlist_inner_dict_factory()
@@ -241,12 +252,15 @@ def add_node_override(self, node_for_adding, **attr):
241252
# attr_dict.update(attr)
242253

243254
# New:
244-
self._node[node_for_adding] = self.node_attr_dict_factory()
245-
self._node[node_for_adding].update(attr)
255+
node_attr_dict = self.node_attr_dict_factory()
256+
node_attr_dict.data = attr
257+
self._node[node_for_adding] = node_attr_dict
246258

247259
# Reason:
248-
# Invoking `update` on the `attr_dict` without `attr_dict.node_id` being set
249-
# i.e trying to update a node's attributes before we know _which_ node it is
260+
# We can optimize the process of adding a node by creating avoiding
261+
# the creation of a new dictionary and updating it with the attributes.
262+
# Instead, we can create a new node_attr_dict object and set the attributes
263+
# directly. This only makes 1 network call to the database instead of 2.
250264

251265
###########################
252266

@@ -255,6 +269,49 @@ def add_node_override(self, node_for_adding, **attr):
255269

256270
nx._clear_cache(self)
257271

272+
def add_nodes_from_override(self, nodes_for_adding, **attr):
273+
for n in nodes_for_adding:
274+
try:
275+
newnode = n not in self._node
276+
newdict = attr
277+
except TypeError:
278+
n, ndict = n
279+
newnode = n not in self._node
280+
newdict = attr.copy()
281+
newdict.update(ndict)
282+
if newnode:
283+
if n is None:
284+
raise ValueError("None cannot be a node")
285+
self._succ[n] = self.adjlist_inner_dict_factory()
286+
self._pred[n] = self.adjlist_inner_dict_factory()
287+
288+
######################
289+
# NOTE: monkey patch #
290+
######################
291+
292+
# Old:
293+
# self._node[n] = self.node_attr_dict_factory()
294+
#
295+
# self._node[n].update(newdict)
296+
297+
# New:
298+
node_attr_dict = self.node_attr_dict_factory()
299+
node_attr_dict.data = newdict
300+
self._node[n] = node_attr_dict
301+
302+
else:
303+
self._node[n].update(newdict)
304+
305+
# Reason:
306+
# We can optimize the process of adding a node by creating avoiding
307+
# the creation of a new dictionary and updating it with the attributes.
308+
# Instead, we create a new node_attr_dict object and set the attributes
309+
# directly. This only makes 1 network call to the database instead of 2.
310+
311+
###########################
312+
313+
nx._clear_cache(self)
314+
258315
def remove_node_override(self, n):
259316
if isinstance(n, (str, int)):
260317
n = get_node_id(str(n), self.default_node_type)

nx_arangodb/classes/function.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,17 @@ def to_dict(self):
199199
return cls
200200

201201

202+
def cast_to_string(value: Any) -> str:
203+
"""Casts a value to a string."""
204+
if isinstance(value, str):
205+
return value
206+
207+
if isinstance(value, (int, float)):
208+
return str(value)
209+
210+
raise TypeError(f"{value} cannot be casted to string.")
211+
212+
202213
def key_is_string(func: Callable[..., Any]) -> Any:
203214
"""Decorator to check if the key is a string.
204215
Will attempt to cast the key to a string if it is not.
@@ -208,12 +219,7 @@ def wrapper(self: Any, key: Any, *args: Any, **kwargs: Any) -> Any:
208219
if key is None:
209220
raise ValueError("Key cannot be None.")
210221

211-
if not isinstance(key, str):
212-
if not isinstance(key, (int, float)):
213-
raise TypeError(f"{key} cannot be casted to string.")
214-
215-
key = str(key)
216-
222+
key = cast_to_string(key)
217223
return func(self, key, *args, **kwargs)
218224

219225
return wrapper
@@ -270,12 +276,7 @@ def wrapper(self: Any, data: Any, *args: Any, **kwargs: Any) -> Any:
270276
raise TypeError(f"Decorator found unsupported type: {type(data)}.")
271277

272278
for key, value in items:
273-
if not isinstance(key, str):
274-
if not isinstance(key, (int, float)):
275-
raise TypeError(f"{key} cannot be casted to string.")
276-
277-
key = str(key)
278-
279+
key = cast_to_string(key)
279280
data_dict[key] = value
280281

281282
return func(self, data_dict, *args, **kwargs)
@@ -655,7 +656,7 @@ def doc_insert(
655656
data: dict[str, Any] = {},
656657
**kwargs: Any,
657658
) -> dict[str, Any]:
658-
"""Inserts a document into a collection."""
659+
"""Inserts a document into a collection. Returns document metadata."""
659660
result: dict[str, Any] = db.insert_document(
660661
collection, {**data, "_id": id}, overwrite=True, **kwargs
661662
)

0 commit comments

Comments
 (0)