Skip to content

Commit 82dee71

Browse files
kaustubh-darekarkartikpersistent
authored andcommitted
Handled EquivalentSchemaRuleAlreadyExist due to race condition (#949)
1 parent a07b804 commit 82dee71

File tree

2 files changed

+30
-53
lines changed

2 files changed

+30
-53
lines changed

backend/src/main.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,7 @@ async def processing_source(uri, userName, password, database, model, file_name,
338338
logging.info(f'Time taken database connection: {elapsed_create_connection:.2f} seconds')
339339
uri_latency["create_connection"] = f'{elapsed_create_connection:.2f}'
340340
graphDb_data_Access = graphDBdataAccess(graph)
341+
create_chunk_vector_index(graph)
341342
start_get_chunkId_chunkDoc_list = time.time()
342343
total_chunks, chunkId_chunkDoc_list = get_chunkId_chunkDoc_list(graph, file_name, pages, retry_condition)
343344
end_get_chunkId_chunkDoc_list = time.time()
@@ -482,7 +483,7 @@ async def processing_chunks(chunkId_chunkDoc_list,graph,uri, userName, password,
482483
graph = create_graph_database_connection(uri, userName, password, database)
483484

484485
start_update_embedding = time.time()
485-
update_embedding_create_vector_index( graph, chunkId_chunkDoc_list, file_name)
486+
create_chunk_embeddings( graph, chunkId_chunkDoc_list, file_name)
486487
end_update_embedding = time.time()
487488
elapsed_update_embedding = end_update_embedding - start_update_embedding
488489
logging.info(f'Time taken to update embedding in chunk node: {elapsed_update_embedding:.2f} seconds')

backend/src/make_relationships.py

Lines changed: 28 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
import os
1010
import uuid
1111
import hashlib
12+
import time
13+
from langchain_neo4j import Neo4jVector
1214

1315
logging.basicConfig(format='%(asctime)s - %(message)s',level='INFO')
1416

@@ -41,28 +43,8 @@ def merge_relationship_between_chunk_and_entites(graph: Neo4jGraph, graph_docume
4143
"""
4244
graph.query(unwind_query, params={"batch_data": batch_data})
4345

44-
graph.query('MATCH(c:Chunk {'+chunk_node_id_set.format(graph_doc_chunk_id['chunk_id'])+'}) MERGE (n:'+ node.type +'{ id: "'+node_id+'"}) MERGE (c)-[:HAS_ENTITY]->(n)')
45-
46-
def load_embedding_model(embedding_model_name: str):
47-
if embedding_model_name == "openai":
48-
embeddings = OpenAIEmbeddings()
49-
dimension = 1536
50-
logging.info("Embedding: Using OpenAI")
51-
elif embedding_model_name == "vertexai":
52-
embeddings = VertexAIEmbeddings(
53-
model="textembedding-gecko@003"
54-
)
55-
dimension = 768
56-
logging.info("Embedding: Using Vertex AI Embeddings")
57-
else:
58-
embeddings = SentenceTransformerEmbeddings(
59-
model_name="all-MiniLM-L6-v2"#, cache_folder="/embedding_model"
60-
)
61-
dimension = 384
62-
logging.info("Embedding: Using SentenceTransformer")
63-
return embeddings, dimension
64-
65-
def update_embedding_create_vector_index(graph, chunkId_chunkDoc_list, file_name):
46+
47+
def create_chunk_embeddings(graph, chunkId_chunkDoc_list, file_name):
6648
#create embedding
6749
isEmbedding = os.getenv('IS_EMBEDDING')
6850
# embedding_model = os.getenv('EMBEDDING_MODEL')
@@ -79,35 +61,6 @@ def update_embedding_create_vector_index(graph, chunkId_chunkDoc_list, file_name
7961
"chunkId": row['chunk_id'],
8062
"embeddings": embeddings_arr
8163
})
82-
# graph.query("""MATCH (d:Document {fileName : $fileName})
83-
# MERGE (c:Chunk {id:$chunkId}) SET c.embedding = $embeddings
84-
# MERGE (c)-[:PART_OF]->(d)
85-
# """,
86-
# {
87-
# "fileName" : file_name,
88-
# "chunkId": row['chunk_id'],
89-
# "embeddings" : embeddings_arr
90-
# }
91-
# )
92-
logging.info('create vector index on chunk embedding')
93-
# result = graph.query("SHOW INDEXES YIELD * WHERE labelsOrTypes = ['Chunk'] and name = 'vector'")
94-
vector_index = graph.query("SHOW INDEXES YIELD * WHERE labelsOrTypes = ['Chunk'] and type = 'VECTOR' AND name = 'vector' return options")
95-
# if result:
96-
# logging.info(f"vector index dropped for 'Chunk'")
97-
# graph.query("DROP INDEX vector IF EXISTS;")
98-
99-
if len(vector_index) == 0:
100-
logging.info(f'vector index is not exist, will create in next query')
101-
graph.query("""CREATE VECTOR INDEX `vector` if not exists for (c:Chunk) on (c.embedding)
102-
OPTIONS {indexConfig: {
103-
`vector.dimensions`: $dimensions,
104-
`vector.similarity_function`: 'cosine'
105-
}}
106-
""",
107-
{
108-
"dimensions" : dimension
109-
}
110-
)
11164

11265
query_to_create_embedding = """
11366
UNWIND $data AS row
@@ -214,4 +167,27 @@ def create_relation_between_chunks(graph, file_name, chunks: List[Document])->li
214167
"""
215168
graph.query(query_to_create_NEXT_CHUNK_relation, params={"relationships": relationships})
216169

217-
return lst_chunks_including_hash
170+
return lst_chunks_including_hash
171+
172+
173+
def create_chunk_vector_index(graph):
174+
start_time = time.time()
175+
try:
176+
vector_index = graph.query("SHOW INDEXES YIELD * WHERE labelsOrTypes = ['Chunk'] and type = 'VECTOR' AND name = 'vector' return options")
177+
178+
if not vector_index:
179+
vector_store = Neo4jVector(embedding=EMBEDDING_FUNCTION,
180+
graph=graph,
181+
node_label="Chunk",
182+
embedding_node_property="embedding",
183+
index_name="vector_index"
184+
)
185+
vector_store.create_new_index()
186+
logging.info(f"Index created successfully. Time taken: {time.time() - start_time:.2f} seconds")
187+
else:
188+
logging.info(f"Index already exist,Skipping creation. Time taken: {time.time() - start_time:.2f} seconds")
189+
except Exception as e:
190+
if "EquivalentSchemaRuleAlreadyExists" in str(e):
191+
logging.info("Vector index already exists, skipping creation.")
192+
else:
193+
raise

0 commit comments

Comments
 (0)