Skip to content

Commit c02ea60

Browse files
authored
Merge pull request #356 from cnoe-io/rag-improvements
fix(rag): webui nginx; logging; prompt improvements
2 parents abf7765 + a71d65f commit c02ea60

File tree

18 files changed

+331
-235
lines changed

18 files changed

+331
-235
lines changed

.gitleaksignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
/github/workspace/ai_platform_engineering/knowledge_bases/rag/common/src/common/graph_db/neo4j/graph_db.py:generic-api-key:48
2+
/github/workspace/ai_platform_engineering/knowledge_bases/rag/common/src/common/graph_db/neo4j/graph_db.py:generic-api-key:49

ai_platform_engineering/knowledge_bases/rag/agent_ontology/src/agent_ontology/agent.py

Lines changed: 74 additions & 77 deletions
Large diffs are not rendered by default.

ai_platform_engineering/knowledge_bases/rag/agent_ontology/src/agent_ontology/relation_manager.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from common.models.graph import Entity, Relation
1010
from common.graph_db.base import GraphDB
1111

12+
CLIENT_NAME="relation_manager"
1213

1314
class RelationCandidateManager:
1415
"""
@@ -29,12 +30,13 @@ def __init__(self, graph_db: GraphDB, ontology_graph_db: GraphDB, acceptance_thr
2930

3031
async def cleanup(self):
3132
"""
32-
Deletes all relation candidates that are not from the current heuristics version.
33-
This is used to reset the relation candidates, e.g. when starting a new dataset.
33+
Deletes all relation candidates that are not from the current heuristics version, as well as any applied relations.
34+
TODO: Move to the GraphDB class
3435
"""
3536
self.logger.info("Cleaning up relation candidates from the database")
3637
await self.ontology_graph_db.raw_query(f"MATCH ()-[r]->() WHERE r.heuristics_version_id <> '{self.heuristics_version_id}' DELETE r")
3738
await self.ontology_graph_db.raw_query(f"MATCH (n) WHERE n.heuristics_version_id <> '{self.heuristics_version_id}' DETACH DELETE n")
39+
await self.data_graph_db.raw_query(f"MATCH ()-[r]-() WHERE r.{constants.UPDATED_BY_KEY}={CLIENT_NAME} AND r.{constants.HEURISTICS_VERSION_ID_KEY} <> '{self.heuristics_version_id}' DELETE r")
3840

3941

4042
async def _set_heuristic(self, relation_id: str, candidate: RelationCandidate, recreate: bool = False):
@@ -52,15 +54,15 @@ async def _set_heuristic(self, relation_id: str, candidate: RelationCandidate, r
5254
constants.ENTITY_TYPE_NAME_KEY: candidate.heuristic.entity_a_type,
5355
constants.HEURISTICS_VERSION_ID_KEY: self.heuristics_version_id
5456
})
55-
await self.ontology_graph_db.update_entity(candidate.heuristic.entity_a_type, [entity_a], fresh_until=utils.get_default_fresh_until(), client_name="relation_manager")
57+
await self.ontology_graph_db.update_entity(candidate.heuristic.entity_a_type, [entity_a], fresh_until=utils.get_default_fresh_until(), client_name=CLIENT_NAME)
5658
entity_b = Entity(
5759
primary_key_properties=[constants.ENTITY_TYPE_NAME_KEY, constants.HEURISTICS_VERSION_ID_KEY],
5860
entity_type=candidate.heuristic.entity_b_type,
5961
all_properties={
6062
constants.ENTITY_TYPE_NAME_KEY: candidate.heuristic.entity_b_type,
6163
constants.HEURISTICS_VERSION_ID_KEY: self.heuristics_version_id
6264
})
63-
await self.ontology_graph_db.update_entity(candidate.heuristic.entity_b_type, [entity_b], fresh_until=utils.get_default_fresh_until(), client_name="relation_manager")
65+
await self.ontology_graph_db.update_entity(candidate.heuristic.entity_b_type, [entity_b], fresh_until=utils.get_default_fresh_until(), client_name=CLIENT_NAME)
6466

6567
# Use the evaluation relation name if available (and accepted)
6668
relation_name = constants.PLACEHOLDER_RELATION_NAME
@@ -110,7 +112,6 @@ async def _set_heuristic(self, relation_id: str, candidate: RelationCandidate, r
110112
"evaluation_last_evaluated": candidate.evaluation.last_evaluated if candidate.evaluation else 0,
111113
"evaluation_entity_a_property_values": utils.json_encode(candidate.evaluation.entity_a_property_values) if candidate.evaluation else None,
112114
"evaluation_entity_a_property_counts": utils.json_encode(candidate.evaluation.entity_a_property_counts) if candidate.evaluation else None,
113-
"evaluation_last_evaluation_count": candidate.evaluation.last_evaluation_count if candidate.evaluation else 0,
114115

115116

116117
"is_applied": candidate.is_applied,
@@ -119,7 +120,7 @@ async def _set_heuristic(self, relation_id: str, candidate: RelationCandidate, r
119120
}
120121
),
121122
fresh_until=utils.get_default_fresh_until(),
122-
client_name="relation_manager"
123+
client_name=CLIENT_NAME
123124
)
124125

125126

@@ -155,7 +156,6 @@ async def parse_relation_candidate(self, relation_properties: dict[str, Any]) ->
155156
"last_evaluated": relation_properties.get("evaluation_last_evaluated", 0),
156157
"entity_a_property_values": json.loads(relation_properties.get("evaluation_entity_a_property_values", "{}")),
157158
"entity_a_property_counts": json.loads(relation_properties.get("evaluation_entity_a_property_counts", "{}")),
158-
"last_evaluation_count": relation_properties.get("evaluation_last_evaluation_count", 0)
159159
}
160160
evaluation = FkeyEvaluation.model_validate(evaluation_data)
161161

@@ -310,7 +310,7 @@ async def update_evaluation(self, relation_id: str,
310310
thought: str,
311311
entity_a_property_values: dict[str, List[str]],
312312
entity_a_property_counts: dict[str, int],
313-
evaluation_count: int):
313+
evaluation_heuristic_count: int):
314314
"""
315315
Updates the evaluation for the given relation_id.
316316
:param relation_id: The ID of the relation to update.
@@ -320,7 +320,7 @@ async def update_evaluation(self, relation_id: str,
320320
:param thought: The agent's thoughts about the relation.
321321
:param entity_a_property_values: The values of the properties of entity_a that were used to evaluate the relation.
322322
:param entity_a_property_counts: The counts of the properties of entity_a that were used to evaluate the relation.
323-
:param evaluation_count: The count of the evaluation.
323+
:param evaluation_heuristic_count: The count in heuristics when evaluating.
324324
"""
325325
self.logger.debug(f"Updating evaluation for {relation_id}")
326326
# Acquire a lock for the relation_id to avoid concurrent updates to the same heuristic
@@ -336,7 +336,7 @@ async def update_evaluation(self, relation_id: str,
336336
entity_a_property_values=entity_a_property_values,
337337
entity_a_property_counts=entity_a_property_counts,
338338
last_evaluated=int(time.time()),
339-
last_evaluation_count=evaluation_count
339+
evaluation_heuristic_count=evaluation_heuristic_count
340340
)
341341

342342
await self._set_heuristic(relation_id, candidate, recreate=True)
@@ -373,7 +373,7 @@ async def apply_relation(self, client_name: str, relation_id: str, manual: bool=
373373
if candidate.evaluation is None:
374374
self.logger.warning(f"Relation {relation_id} has no evaluation, cannot apply relation.")
375375
return
376-
self.logger.info(f"Applying relation {relation_id}, {candidate.model_dump_json()}")
376+
self.logger.debug(f"Applying relation {relation_id}, {candidate.model_dump_json()}")
377377
if candidate.evaluation.relation_name is None or candidate.evaluation.relation_name == "":
378378
self.logger.error(f"Relation {relation_id} has no relation name, cannot apply.")
379379
return
@@ -422,7 +422,7 @@ async def unapply_relation(self, relation_id: str, manual: bool=False):
422422
if candidate.evaluation is None:
423423
self.logger.warning(f"Relation {relation_id} has no evaluation, cannot unapply relation.")
424424
return
425-
self.logger.info(f"Unapplying relation {relation_id}, {candidate.model_dump_json()}")
425+
self.logger.debug(f"Unapplying relation {relation_id}, {candidate.model_dump_json()}")
426426

427427
if candidate.evaluation.relation_name is None or candidate.evaluation.relation_name == "":
428428
self.logger.error(f"Relation {relation_id} has no relation name, cannot undo.")

ai_platform_engineering/knowledge_bases/rag/agent_ontology/src/agent_ontology/restapi.py

Lines changed: 32 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -13,57 +13,60 @@
1313
import uvicorn
1414
import redis.asyncio as redis
1515

16-
port = int(os.getenv("SERVER_PORT", 8098))
17-
1816
# Load environment variables from .env file
1917
dotenv.load_dotenv()
2018

21-
logging = utils.get_logger("rest-server")
19+
logger = utils.get_logger("restapi")
2220

21+
port = int(os.getenv("SERVER_PORT", 8098))
2322
SYNC_INTERVAL = int(os.getenv('SYNC_INTERVAL', 21600)) # 6 hours by default
2423
ACCEPTANCE_THRESHOLD = float(os.getenv('ACCEPTANCE_THRESHOLD', float(0.75))) # > 75% by default
2524
REJECTION_THRESHOLD = float(os.getenv('REJECTION_THRESHOLD', float(0.3))) # < 40% by default
2625
MIN_COUNT_FOR_EVAL = int(os.getenv('MIN_COUNT_FOR_EVAL', int(3))) # 3 by default
27-
PERCENT_CHANGE_FOR_EVAL = float(os.getenv('PERCENT_CHANGE_FOR_EVAL', float(0.1))) # 10% by default
28-
MAX_CONCURRENT_PROCESSING = int(os.getenv('MAX_CONCURRENT_PROCESSING', int(30))) # 30 by default
29-
MAX_CONCURRENT_EVALUATION = int(os.getenv('MAX_CONCURRENT_EVALUATION', int(5))) # 5 by default
26+
COUNT_CHANGE_THRESHOLD_RATIO = float(os.getenv('COUNT_CHANGE_THRESHOLD_RATIO', float(0.1))) # 10% by default
27+
MAX_CONCURRENT_PROCESSING = int(os.getenv('MAX_CONCURRENT_PROCESSING', int(40))) # 40 by default
28+
MAX_CONCURRENT_EVALUATION = int(os.getenv('MAX_CONCURRENT_EVALUATION', int(10))) # 10 by default
29+
30+
GRAPH_DB_CLIENT_NAME="web_manual"
3031

3132
scheduler = AsyncIOScheduler()
3233

3334
# Initialize dependencies
34-
logging.info("Initializing data graph database...")
35+
logger.info("Initializing data graph database...")
3536
graph_db: GraphDB = Neo4jDB()
3637

37-
logging.info("Initializing ontology graph database...")
38+
logger.info("Initializing ontology graph database...")
3839
ontology_graph_db: GraphDB = Neo4jDB(uri=os.getenv("NEO4J_ONTOLOGY_ADDR", "bolt://localhost:7688"))
3940

40-
logging.info("Initializing key-value store...")
41+
logger.info("Initializing key-value store...")
4142
redis_client = redis.from_url(os.getenv("REDIS_URL", "redis://localhost:6379"))
4243

43-
logging.info("Initializing ontology agent...")
44+
logger.info("Initializing ontology agent...")
45+
logger.info("Config:\nAcceptance threshold: %s\nRejection threshold: %s\nMax concurrent processing: %s\nMax concurrent evaluation: %s\nCount change threshold ratio: %s\nMin count for eval: %s",
46+
ACCEPTANCE_THRESHOLD, REJECTION_THRESHOLD, MAX_CONCURRENT_PROCESSING, MAX_CONCURRENT_EVALUATION, COUNT_CHANGE_THRESHOLD_RATIO, MIN_COUNT_FOR_EVAL)
4447
agent: OntologyAgent = OntologyAgent(graph_db=graph_db,
4548
ontology_graph_db=ontology_graph_db,
4649
redis=redis_client,
4750
acceptance_threshold=ACCEPTANCE_THRESHOLD,
4851
rejection_threshold=REJECTION_THRESHOLD,
4952
min_count_for_eval=MIN_COUNT_FOR_EVAL,
50-
percent_change_for_eval=PERCENT_CHANGE_FOR_EVAL,
53+
count_change_threshold_ratio=COUNT_CHANGE_THRESHOLD_RATIO,
5154
max_concurrent_processing=MAX_CONCURRENT_PROCESSING,
5255
max_concurrent_evaluation=MAX_CONCURRENT_EVALUATION,
5356
)
5457

5558

5659
@asynccontextmanager
5760
async def lifespan(_: FastAPI):
58-
logging.info("Setting up key-value store with heuristics version")
61+
logger.info("Setting up key-value store with heuristics version")
5962

6063
# Fetch latest heuristics version
6164
heuristics_version_id = await redis_client.get(constants.KV_HEURISTICS_VERSION_ID_KEY)
6265
if heuristics_version_id is None: # if no heuristics version is found, create one
6366
heuristics_version_id = utils.get_uuid()
6467
await redis_client.set(constants.KV_HEURISTICS_VERSION_ID_KEY, heuristics_version_id)
6568

66-
logging.info("Running the ontology agent periodically every %s seconds ...", SYNC_INTERVAL)
69+
logger.info("Running the ontology agent periodically every %s seconds ...", SYNC_INTERVAL)
6770
scheduler.add_job(agent.process_and_evaluate_all, trigger=IntervalTrigger(seconds=SYNC_INTERVAL))
6871
scheduler.start()
6972

@@ -86,9 +89,9 @@ async def accept_relation(relation_id: str):
8689
"""
8790
Accepts a foreign key relation
8891
"""
89-
logging.warning("Accepting foreign key relation %s", relation_id)
92+
logger.warning("Accepting foreign key relation %s", relation_id)
9093
rc_manager = await get_rc_manager_with_latest_heuristics()
91-
await rc_manager.apply_relation("web", relation_id, manual=True)
94+
await rc_manager.apply_relation(GRAPH_DB_CLIENT_NAME, relation_id, manual=True) # TODO: change client name to something more meaningful
9295

9396
return JSONResponse(status_code=200, content={"message": "Foreign key relation accepted"})
9497

@@ -97,7 +100,7 @@ async def reject_relation(relation_id: str):
97100
"""
98101
Reject a foreign key relation
99102
"""
100-
logging.warning("Rejecting foreign key relation %s", relation_id)
103+
logger.warning("Rejecting foreign key relation %s", relation_id)
101104
rc_manager = await get_rc_manager_with_latest_heuristics()
102105
await rc_manager.unapply_relation(relation_id, manual=True) # setting manual=True will explicitly reject the relation
103106
return JSONResponse(status_code=200, content={"message": "Foreign key relation rejected"})
@@ -107,7 +110,7 @@ async def unreject_relation(relation_id: str):
107110
"""
108111
Undo an accepted or rejected foreign key relation
109112
"""
110-
logging.warning("Un-rejecting foreign key relation %s", relation_id)
113+
logger.warning("Un-rejecting foreign key relation %s", relation_id)
111114
rc_manager = await get_rc_manager_with_latest_heuristics()
112115
await rc_manager.unapply_relation(relation_id) # manual=False by default, so it will be undone without explicitly rejecting
113116
return JSONResponse(status_code=200, content={"message": "Foreign key relation un-rejected"})
@@ -117,7 +120,7 @@ async def evaluate_relation(relation_id: str):
117120
"""
118121
Asks the agent to reevaluate a single foreign key relation
119122
"""
120-
logging.warning("Re-evaluating foreign key relation %s", relation_id)
123+
logger.warning("Re-evaluating foreign key relation %s", relation_id)
121124
rc_manager = await get_rc_manager_with_latest_heuristics()
122125
await agent.evaluate(rc_manager=rc_manager, relation_id=relation_id)
123126
return JSONResponse(status_code=200, content={"message": "Submitted"})
@@ -128,9 +131,9 @@ async def sync_relation(relation_id: str):
128131
"""
129132
Syncs a single foreign key relation with the graph database
130133
"""
131-
logging.warning("Syncing foreign key relation %s", relation_id)
134+
logger.warning("Syncing foreign key relation %s", relation_id)
132135
rc_manager = await get_rc_manager_with_latest_heuristics()
133-
await rc_manager.sync_relation("web", relation_id)
136+
await rc_manager.sync_relation(GRAPH_DB_CLIENT_NAME, relation_id)
134137
return JSONResponse(status_code=200, content={"message": "Submitted"})
135138

136139

@@ -139,7 +142,7 @@ async def regenerate_ontology(background_tasks: BackgroundTasks):
139142
"""
140143
Asks the agent to regenerate the ontology graph based on current foreign key relations in the data graph
141144
"""
142-
logging.warning("Regenerating ontology graph")
145+
logger.warning("Regenerating ontology graph")
143146
if agent.is_processing or agent.is_evaluating:
144147
return JSONResponse(status_code=400, content={"message": "Heuristics processing is in progress"})
145148
background_tasks.add_task(agent.process_and_evaluate_all)
@@ -150,7 +153,7 @@ async def clear_ontology():
150153
"""
151154
Clears all foreign key relations and the ontology graph
152155
"""
153-
logging.warning("Clearing all foreign key relations and the ontology graph")
156+
logger.warning("Clearing all foreign key relations and the ontology graph")
154157
if agent.is_processing or agent.is_evaluating:
155158
return JSONResponse(status_code=400, content={"message": "Heuristics processing is in progress"})
156159
heuristics_version_id = await redis_client.get(constants.KV_HEURISTICS_VERSION_ID_KEY)
@@ -178,7 +181,7 @@ async def process_entity(entity_type: str, primary_key_value: str):
178181
Asks the agent to process a specific entity for heuristics, this is used for debugging
179182
For debugging purposes
180183
"""
181-
logging.warning("Processing entity %s:%s for heuristics", entity_type, primary_key_value)
184+
logger.warning("Processing entity %s:%s for heuristics", entity_type, primary_key_value)
182185
rc_manager = await get_rc_manager_with_latest_heuristics()
183186
await agent.process(rc_manager, entity_type, primary_key_value)
184187
return JSONResponse(status_code=200, content={"message": "Submitted for processing"})
@@ -190,7 +193,7 @@ async def process_all(background_tasks: BackgroundTasks):
190193
Asks the agent to process all foreign key relations
191194
For debugging purposes
192195
"""
193-
logging.warning("Processing all heuristics")
196+
logger.warning("Processing all heuristics")
194197
if agent.is_processing:
195198
return JSONResponse(status_code=400, content={"message": "Heuristics processing is already in progress"})
196199

@@ -206,7 +209,7 @@ async def evaluate_all(background_tasks: BackgroundTasks):
206209
Asks the agent to reevaluate all heuristics
207210
For debugging purposes
208211
"""
209-
logging.warning("Re-evaluating all heuristics")
212+
logger.warning("Re-evaluating all heuristics")
210213
if agent.is_processing:
211214
return JSONResponse(status_code=400, content={"message": "Heuristics processing is in progress"})
212215
rc_manager = await get_rc_manager_with_latest_heuristics()
@@ -221,7 +224,7 @@ async def cleanup():
221224
For debugging purposes
222225
"""
223226
rc_manager = await get_rc_manager_with_latest_heuristics()
224-
await rc_manager.cleanup() # This will remove all relations that are no longer candidates, but still exist in the graph database
227+
await rc_manager.cleanup() # This will remove all relations that are no longer candidates, as well as applied relations
225228
return JSONResponse(status_code=200, content={"message": "Submitted"})
226229

227230
#####
@@ -238,8 +241,10 @@ async def status():
238241
"evaluated_tasks_count": agent.evaluated_tasks_count,
239242
"candidate_acceptance_threshold": ACCEPTANCE_THRESHOLD,
240243
"candidate_rejection_threshold": REJECTION_THRESHOLD,
244+
"max_concurrent_processing": MAX_CONCURRENT_PROCESSING,
245+
"max_concurrent_evaluation": MAX_CONCURRENT_EVALUATION,
241246
"min_count_for_eval": MIN_COUNT_FOR_EVAL,
242-
"percent_change_for_eval": PERCENT_CHANGE_FOR_EVAL}
247+
"count_change_threshold_ratio": COUNT_CHANGE_THRESHOLD_RATIO,}
243248

244249
if __name__ == "__main__":
245250
uvicorn.run(app, host="0.0.0.0", port=port)

ai_platform_engineering/knowledge_bases/rag/agent_ontology/src/agent_ontology/tests/test_evaluate_e2e.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
import pytest
66
from core.models import RelationCandidate
77
from core.graph_db.neo4j.graph_db import Neo4jDB
8-
from agent_graph_gen.relation_manager import RelationCandidateManager
9-
from agent_graph_gen.agent import OntologyAgent
8+
from agent_ontology.relation_manager import RelationCandidateManager
9+
from agent_ontology.agent import OntologyAgent
1010
from core.utils import get_default_fresh_until
1111
from core.models import Entity
1212
from core.key_value.base import KVStore
@@ -70,7 +70,7 @@ async def test_each_evaluation():
7070
acceptance_threshold=rc.acceptance_threshold,
7171
rejection_threshold=rc.rejection_threshold,
7272
min_count_for_eval=1,
73-
percent_change_for_eval=0.2,
73+
count_change_threshold_ratio=0.2,
7474
max_concurrent_processing=30,
7575
max_concurrent_evaluation=5)
7676
logging.info("Running heuristics processing...")

ai_platform_engineering/knowledge_bases/rag/agent_ontology/src/agent_ontology/tests/test_heuristics_e2e.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import pytest_asyncio
44
import logging
55
import json
6-
from agent_graph_gen.relation_manager import RelationCandidateManager
6+
from agent_ontology.relation_manager import RelationCandidateManager
77

88
from core.graph_db.neo4j.graph_db import Neo4jDB
99
from core.utils import ObjEncoder, get_default_fresh_until
@@ -13,7 +13,7 @@
1313
from core.models import Entity
1414

1515

16-
from agent_graph_gen.heuristics import HeuristicsProcessor
16+
from agent_ontology.heuristics import HeuristicsProcessor
1717
from core.constants import DEFAULT_LABEL, UPDATED_BY_KEY
1818

1919

0 commit comments

Comments
 (0)