@@ -127,7 +127,6 @@ def __init__(
127
127
embedding : EmbeddingModel ,
128
128
* ,
129
129
node_table : str = "graph_nodes" ,
130
- targets_table : str = "graph_targets" ,
131
130
session : Optional [Session ] = None ,
132
131
keyspace : Optional [str ] = None ,
133
132
setup_mode : SetupMode = SetupMode .SYNC ,
@@ -141,12 +140,8 @@ def __init__(
141
140
if not _CQL_IDENTIFIER_PATTERN .fullmatch (node_table ):
142
141
raise ValueError (f"Invalid node table name: { node_table } " )
143
142
144
- if not _CQL_IDENTIFIER_PATTERN .fullmatch (targets_table ):
145
- raise ValueError (f"Invalid node table name: { targets_table } " )
146
-
147
143
self ._embedding = embedding
148
144
self ._node_table = node_table
149
- self ._targets_table = targets_table
150
145
self ._session = session
151
146
self ._keyspace = keyspace
152
147
@@ -163,16 +158,8 @@ def __init__(
163
158
f"""
164
159
INSERT INTO { keyspace } .{ node_table } (
165
160
content_id, kind, text_content, text_embedding, link_to_tags,
166
- metadata_blob, links_blob
167
- ) VALUES (?, '{ Kind .passage } ', ?, ?, ?, ?, ?)
168
- """ # noqa: S608
169
- )
170
-
171
- self ._insert_tag = session .prepare (
172
- f"""
173
- INSERT INTO { keyspace } .{ targets_table } (
174
- target_content_id, kind, tag, target_text_embedding, target_link_to_tags
175
- ) VALUES (?, ?, ?, ?, ?)
161
+ link_from_tags, metadata_blob, links_blob
162
+ ) VALUES (?, '{ Kind .passage } ', ?, ?, ?, ?, ?, ?)
176
163
""" # noqa: S608
177
164
)
178
165
@@ -236,56 +223,44 @@ def __init__(
236
223
237
224
self ._query_targets_embeddings_by_kind_and_tag_and_embedding = session .prepare (
238
225
f"""
239
- SELECT target_content_id, target_text_embedding, tag, target_link_to_tags
240
- FROM { keyspace } .{ targets_table }
241
- WHERE kind = ? AND tag = ?
242
- ORDER BY target_text_embedding ANN of ?
226
+ SELECT
227
+ content_id AS target_content_id,
228
+ text_embedding AS target_text_embedding,
229
+ link_to_tags AS target_link_to_tags
230
+ FROM { keyspace } .{ node_table }
231
+ WHERE link_from_tags CONTAINS (?, ?)
232
+ ORDER BY text_embedding ANN of ?
243
233
LIMIT ?
244
- """ # noqa: S608
234
+ """
245
235
)
246
236
247
237
self ._query_targets_by_kind_and_value = session .prepare (
248
238
f"""
249
- SELECT target_content_id, kind, tag
250
- FROM { keyspace } .{ targets_table }
251
- WHERE kind = ? AND tag = ?
252
- """ # noqa: S608
239
+ SELECT
240
+ content_id AS target_content_id
241
+ FROM { keyspace } .{ node_table }
242
+ WHERE link_from_tags CONTAINS (?, ?)
243
+ """
253
244
)
254
245
255
246
def _apply_schema (self ) -> None :
256
247
"""Apply the schema to the database."""
257
248
embedding_dim = len (self ._embedding .embed_query ("Test Query" ))
258
- self ._session .execute (
259
- f""" CREATE TABLE IF NOT EXISTS { self ._keyspace } .{ self ._node_table } (
249
+ self ._session .execute (f"""
250
+ CREATE TABLE IF NOT EXISTS { self ._keyspace } .{ self ._node_table } (
260
251
content_id TEXT,
261
252
kind TEXT,
262
253
text_content TEXT,
263
254
text_embedding VECTOR<FLOAT, { embedding_dim } >,
264
255
265
256
link_to_tags SET<TUPLE<TEXT, TEXT>>,
257
+ link_from_tags SET<TUPLE<TEXT, TEXT>>,
266
258
metadata_blob TEXT,
267
259
links_blob TEXT,
268
260
269
261
PRIMARY KEY (content_id)
270
262
)
271
- """
272
- )
273
-
274
- self ._session .execute (
275
- f"""CREATE TABLE IF NOT EXISTS { self ._keyspace } .{ self ._targets_table } (
276
- target_content_id TEXT,
277
- kind TEXT,
278
- tag TEXT,
279
-
280
- -- text_embedding of target node. allows MMR to be applied without
281
- -- fetching nodes.
282
- target_text_embedding VECTOR<FLOAT, { embedding_dim } >,
283
- target_link_to_tags SET<TUPLE<TEXT, TEXT>>,
284
-
285
- PRIMARY KEY ((kind, tag), target_content_id)
286
- )
287
- """
288
- )
263
+ """ )
289
264
290
265
# Index on text_embedding (for similarity search)
291
266
self ._session .execute (f"""
@@ -294,12 +269,11 @@ def _apply_schema(self) -> None:
294
269
USING 'StorageAttachedIndex';
295
270
""" )
296
271
297
- # Index on target_text_embedding (for similarity search)
298
272
self ._session .execute (f"""
299
- CREATE CUSTOM INDEX IF NOT EXISTS { self ._targets_table } _target_text_embedding_index
300
- ON { self ._keyspace } .{ self ._targets_table } (target_text_embedding )
273
+ CREATE CUSTOM INDEX IF NOT EXISTS { self ._node_table } _link_from_tags
274
+ ON { self ._keyspace } .{ self ._node_table } (link_from_tags )
301
275
USING 'StorageAttachedIndex';
302
- """ ) # noqa: E501
276
+ """ )
303
277
304
278
def _concurrent_queries (self ) -> ConcurrentQueries :
305
279
return ConcurrentQueries (self ._session )
@@ -348,17 +322,12 @@ def add_nodes(
348
322
text ,
349
323
text_embedding ,
350
324
link_to_tags ,
325
+ link_from_tags ,
351
326
metadata_blob ,
352
327
links_blob ,
353
328
),
354
329
)
355
330
356
- for kind , value in link_from_tags :
357
- cq .execute (
358
- self ._insert_tag ,
359
- parameters = (node_id , kind , value , text_embedding , link_to_tags ),
360
- )
361
-
362
331
return node_ids
363
332
364
333
def _nodes_with_ids (
0 commit comments