cnoe-io
diff --git a/‎.gitleaksignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitleaksignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎ai_platform_engineering/knowledge_bases/rag/agent_ontology/src/agent_ontology/agent.py‎
Lines changed: 74 additions & 77 deletions b/‎ai_platform_engineering/knowledge_bases/rag/agent_ontology/src/agent_ontology/agent.py‎
Lines changed: 74 additions & 77 deletions
diff --git a/‎ai_platform_engineering/knowledge_bases/rag/agent_ontology/src/agent_ontology/relation_manager.py‎
Lines changed: 12 additions & 12 deletions b/‎ai_platform_engineering/knowledge_bases/rag/agent_ontology/src/agent_ontology/relation_manager.py‎
Lines changed: 12 additions & 12 deletions
diff --git a/‎ai_platform_engineering/knowledge_bases/rag/agent_ontology/src/agent_ontology/restapi.py‎
Lines changed: 32 additions & 27 deletions b/‎ai_platform_engineering/knowledge_bases/rag/agent_ontology/src/agent_ontology/restapi.py‎
Lines changed: 32 additions & 27 deletions
diff --git a/‎ai_platform_engineering/knowledge_bases/rag/agent_ontology/src/agent_ontology/tests/test_evaluate_e2e.py‎
Lines changed: 3 additions & 3 deletions b/‎ai_platform_engineering/knowledge_bases/rag/agent_ontology/src/agent_ontology/tests/test_evaluate_e2e.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎ai_platform_engineering/knowledge_bases/rag/agent_ontology/src/agent_ontology/tests/test_heuristics_e2e.py‎
Lines changed: 2 additions & 2 deletions b/‎ai_platform_engineering/knowledge_bases/rag/agent_ontology/src/agent_ontology/tests/test_heuristics_e2e.py‎
Lines changed: 2 additions & 2 deletions
@@ -0,0 +1,2 @@
+/github/workspace/ai_platform_engineering/knowledge_bases/rag/common/src/common/graph_db/neo4j/graph_db.py:generic-api-key:48
+/github/workspace/ai_platform_engineering/knowledge_bases/rag/common/src/common/graph_db/neo4j/graph_db.py:generic-api-key:49
@@ -9,6 +9,7 @@
 from common.models.graph import Entity, Relation
 from common.graph_db.base import GraphDB
 
+CLIENT_NAME="relation_manager"
 
 class RelationCandidateManager:
     """
@@ -29,12 +30,13 @@ def __init__(self, graph_db: GraphDB, ontology_graph_db: GraphDB, acceptance_thr
 
     async def cleanup(self):
         """
-        Deletes all relation candidates that are not from the current heuristics version.
-        This is used to reset the relation candidates, e.g. when starting a new dataset.
+        Deletes all relation candidates that are not from the current heuristics version, as well as any applied relations.
+        TODO: Move to the GraphDB class
         """
         self.logger.info("Cleaning up relation candidates from the database")
         await self.ontology_graph_db.raw_query(f"MATCH ()-[r]->() WHERE r.heuristics_version_id <> '{self.heuristics_version_id}' DELETE r")
         await self.ontology_graph_db.raw_query(f"MATCH (n) WHERE n.heuristics_version_id <> '{self.heuristics_version_id}' DETACH DELETE n")
+        await self.data_graph_db.raw_query(f"MATCH ()-[r]-() WHERE r.{constants.UPDATED_BY_KEY}={CLIENT_NAME} AND r.{constants.HEURISTICS_VERSION_ID_KEY} <> '{self.heuristics_version_id}' DELETE r")
 
 
     async def _set_heuristic(self, relation_id: str, candidate: RelationCandidate, recreate: bool = False):
@@ -52,15 +54,15 @@ async def _set_heuristic(self, relation_id: str, candidate: RelationCandidate, r
                 constants.ENTITY_TYPE_NAME_KEY: candidate.heuristic.entity_a_type,
                 constants.HEURISTICS_VERSION_ID_KEY: self.heuristics_version_id
             })
-        await self.ontology_graph_db.update_entity(candidate.heuristic.entity_a_type, [entity_a], fresh_until=utils.get_default_fresh_until(), client_name="relation_manager")
+        await self.ontology_graph_db.update_entity(candidate.heuristic.entity_a_type, [entity_a], fresh_until=utils.get_default_fresh_until(), client_name=CLIENT_NAME)
         entity_b = Entity(
             primary_key_properties=[constants.ENTITY_TYPE_NAME_KEY, constants.HEURISTICS_VERSION_ID_KEY],
             entity_type=candidate.heuristic.entity_b_type,
             all_properties={
                 constants.ENTITY_TYPE_NAME_KEY: candidate.heuristic.entity_b_type,
                 constants.HEURISTICS_VERSION_ID_KEY: self.heuristics_version_id
             })
-        await self.ontology_graph_db.update_entity(candidate.heuristic.entity_b_type, [entity_b], fresh_until=utils.get_default_fresh_until(), client_name="relation_manager")
+        await self.ontology_graph_db.update_entity(candidate.heuristic.entity_b_type, [entity_b], fresh_until=utils.get_default_fresh_until(), client_name=CLIENT_NAME)
 
         # Use the evaluation relation name if available (and accepted)
         relation_name = constants.PLACEHOLDER_RELATION_NAME
@@ -110,7 +112,6 @@ async def _set_heuristic(self, relation_id: str, candidate: RelationCandidate, r
                     "evaluation_last_evaluated": candidate.evaluation.last_evaluated if candidate.evaluation else 0,
                     "evaluation_entity_a_property_values":  utils.json_encode(candidate.evaluation.entity_a_property_values) if candidate.evaluation else None,
                     "evaluation_entity_a_property_counts":  utils.json_encode(candidate.evaluation.entity_a_property_counts) if candidate.evaluation else None,
-                    "evaluation_last_evaluation_count": candidate.evaluation.last_evaluation_count if candidate.evaluation else 0,
 
 
                     "is_applied": candidate.is_applied,
@@ -119,7 +120,7 @@ async def _set_heuristic(self, relation_id: str, candidate: RelationCandidate, r
                 }
             ),
             fresh_until=utils.get_default_fresh_until(),
-            client_name="relation_manager"
+            client_name=CLIENT_NAME
         )
 
 
@@ -155,7 +156,6 @@ async def parse_relation_candidate(self, relation_properties: dict[str, Any]) ->
                 "last_evaluated": relation_properties.get("evaluation_last_evaluated", 0),
                 "entity_a_property_values": json.loads(relation_properties.get("evaluation_entity_a_property_values", "{}")),
                 "entity_a_property_counts": json.loads(relation_properties.get("evaluation_entity_a_property_counts", "{}")),
-                "last_evaluation_count": relation_properties.get("evaluation_last_evaluation_count", 0)
             }
             evaluation = FkeyEvaluation.model_validate(evaluation_data)
 
@@ -310,7 +310,7 @@ async def update_evaluation(self, relation_id: str,
                                 thought: str,
                                 entity_a_property_values: dict[str, List[str]],
                                 entity_a_property_counts: dict[str, int],
-                                evaluation_count: int):
+                                evaluation_heuristic_count: int):
         """
         Updates the evaluation for the given relation_id.
         :param relation_id: The ID of the relation to update.
@@ -320,7 +320,7 @@ async def update_evaluation(self, relation_id: str,
         :param thought: The agent's thoughts about the relation.
         :param entity_a_property_values: The values of the properties of entity_a that were used to evaluate the relation.
         :param entity_a_property_counts: The counts of the properties of entity_a that were used to evaluate the relation.
-        :param evaluation_count: The count of the evaluation.
+        :param evaluation_heuristic_count: The count in heuristics when evaluating.
         """
         self.logger.debug(f"Updating evaluation for {relation_id}")
         # Acquire a lock for the relation_id to avoid concurrent updates to the same heuristic
@@ -336,7 +336,7 @@ async def update_evaluation(self, relation_id: str,
             entity_a_property_values=entity_a_property_values,
             entity_a_property_counts=entity_a_property_counts,
             last_evaluated=int(time.time()),
-            last_evaluation_count=evaluation_count
+            evaluation_heuristic_count=evaluation_heuristic_count
         )
 
         await self._set_heuristic(relation_id, candidate, recreate=True)
@@ -373,7 +373,7 @@ async def apply_relation(self, client_name: str, relation_id: str, manual: bool=
         if candidate.evaluation is None:
             self.logger.warning(f"Relation {relation_id} has no evaluation, cannot apply relation.")
             return
-        self.logger.info(f"Applying relation {relation_id}, {candidate.model_dump_json()}")
+        self.logger.debug(f"Applying relation {relation_id}, {candidate.model_dump_json()}")
         if candidate.evaluation.relation_name is None or candidate.evaluation.relation_name == "":
             self.logger.error(f"Relation {relation_id} has no relation name, cannot apply.")
             return
@@ -422,7 +422,7 @@ async def unapply_relation(self, relation_id: str, manual: bool=False):
         if candidate.evaluation is None:
             self.logger.warning(f"Relation {relation_id} has no evaluation, cannot unapply relation.")
             return
-        self.logger.info(f"Unapplying relation {relation_id}, {candidate.model_dump_json()}")
+        self.logger.debug(f"Unapplying relation {relation_id}, {candidate.model_dump_json()}")
 
         if candidate.evaluation.relation_name is None or candidate.evaluation.relation_name == "":
             self.logger.error(f"Relation {relation_id} has no relation name, cannot undo.")
 
@@ -13,57 +13,60 @@
 import uvicorn
 import redis.asyncio as redis
 
-port = int(os.getenv("SERVER_PORT", 8098))
-
 # Load environment variables from .env file
 dotenv.load_dotenv()
 
-logging = utils.get_logger("rest-server")
+logger = utils.get_logger("restapi")
 
+port = int(os.getenv("SERVER_PORT", 8098))
 SYNC_INTERVAL = int(os.getenv('SYNC_INTERVAL', 21600)) # 6 hours by default
 ACCEPTANCE_THRESHOLD = float(os.getenv('ACCEPTANCE_THRESHOLD', float(0.75))) # > 75% by default
 REJECTION_THRESHOLD = float(os.getenv('REJECTION_THRESHOLD', float(0.3))) # < 40% by default
 MIN_COUNT_FOR_EVAL = int(os.getenv('MIN_COUNT_FOR_EVAL', int(3))) # 3 by default
-PERCENT_CHANGE_FOR_EVAL = float(os.getenv('PERCENT_CHANGE_FOR_EVAL', float(0.1))) # 10% by default
-MAX_CONCURRENT_PROCESSING = int(os.getenv('MAX_CONCURRENT_PROCESSING', int(30))) # 30 by default
-MAX_CONCURRENT_EVALUATION = int(os.getenv('MAX_CONCURRENT_EVALUATION', int(5))) # 5 by default
+COUNT_CHANGE_THRESHOLD_RATIO = float(os.getenv('COUNT_CHANGE_THRESHOLD_RATIO', float(0.1))) # 10% by default
+MAX_CONCURRENT_PROCESSING = int(os.getenv('MAX_CONCURRENT_PROCESSING', int(40))) # 40 by default
+MAX_CONCURRENT_EVALUATION = int(os.getenv('MAX_CONCURRENT_EVALUATION', int(10))) # 10 by default
+
+GRAPH_DB_CLIENT_NAME="web_manual"
 
 scheduler = AsyncIOScheduler()
 
 # Initialize dependencies
-logging.info("Initializing data graph database...")
+logger.info("Initializing data graph database...")
 graph_db: GraphDB = Neo4jDB()
 
-logging.info("Initializing ontology graph database...")
+logger.info("Initializing ontology graph database...")
 ontology_graph_db: GraphDB = Neo4jDB(uri=os.getenv("NEO4J_ONTOLOGY_ADDR", "bolt://localhost:7688"))
 
-logging.info("Initializing key-value store...")
+logger.info("Initializing key-value store...")
 redis_client = redis.from_url(os.getenv("REDIS_URL", "redis://localhost:6379"))
 
-logging.info("Initializing ontology agent...")
+logger.info("Initializing ontology agent...")
+logger.info("Config:\nAcceptance threshold: %s\nRejection threshold: %s\nMax concurrent processing: %s\nMax concurrent evaluation: %s\nCount change threshold ratio: %s\nMin count for eval: %s", 
+            ACCEPTANCE_THRESHOLD, REJECTION_THRESHOLD, MAX_CONCURRENT_PROCESSING, MAX_CONCURRENT_EVALUATION, COUNT_CHANGE_THRESHOLD_RATIO, MIN_COUNT_FOR_EVAL)
 agent: OntologyAgent = OntologyAgent(graph_db=graph_db,
                                         ontology_graph_db=ontology_graph_db,
                                         redis=redis_client,
                                         acceptance_threshold=ACCEPTANCE_THRESHOLD,
                                         rejection_threshold=REJECTION_THRESHOLD,
                                         min_count_for_eval=MIN_COUNT_FOR_EVAL,
-                                        percent_change_for_eval=PERCENT_CHANGE_FOR_EVAL,
+                                        count_change_threshold_ratio=COUNT_CHANGE_THRESHOLD_RATIO,
                                         max_concurrent_processing=MAX_CONCURRENT_PROCESSING,
                                         max_concurrent_evaluation=MAX_CONCURRENT_EVALUATION,
                                     )
 
 
 @asynccontextmanager
 async def lifespan(_: FastAPI):
-    logging.info("Setting up key-value store with heuristics version")
+    logger.info("Setting up key-value store with heuristics version")
 
     # Fetch latest heuristics version
     heuristics_version_id = await redis_client.get(constants.KV_HEURISTICS_VERSION_ID_KEY)
     if heuristics_version_id is None: # if no heuristics version is found, create one
         heuristics_version_id = utils.get_uuid()
         await redis_client.set(constants.KV_HEURISTICS_VERSION_ID_KEY, heuristics_version_id)
 
-    logging.info("Running the ontology agent periodically every %s seconds ...", SYNC_INTERVAL)
+    logger.info("Running the ontology agent periodically every %s seconds ...", SYNC_INTERVAL)
     scheduler.add_job(agent.process_and_evaluate_all, trigger=IntervalTrigger(seconds=SYNC_INTERVAL))
     scheduler.start()
 
@@ -86,9 +89,9 @@ async def accept_relation(relation_id: str):
     """
     Accepts a foreign key relation
     """
-    logging.warning("Accepting foreign key relation %s", relation_id)
+    logger.warning("Accepting foreign key relation %s", relation_id)
     rc_manager = await get_rc_manager_with_latest_heuristics()
-    await rc_manager.apply_relation("web", relation_id, manual=True)
+    await rc_manager.apply_relation(GRAPH_DB_CLIENT_NAME, relation_id, manual=True) # TODO: change client name to something more meaningful
 
     return JSONResponse(status_code=200, content={"message": "Foreign key relation accepted"})
 
@@ -97,7 +100,7 @@ async def reject_relation(relation_id: str):
     """
     Reject a foreign key relation
     """
-    logging.warning("Rejecting foreign key relation %s", relation_id)
+    logger.warning("Rejecting foreign key relation %s", relation_id)
     rc_manager = await get_rc_manager_with_latest_heuristics()
     await rc_manager.unapply_relation(relation_id, manual=True) # setting manual=True will explicitly reject the relation
     return JSONResponse(status_code=200, content={"message": "Foreign key relation rejected"})
@@ -107,7 +110,7 @@ async def unreject_relation(relation_id: str):
     """
     Undo an accepted or rejected foreign key relation
     """
-    logging.warning("Un-rejecting foreign key relation %s", relation_id)
+    logger.warning("Un-rejecting foreign key relation %s", relation_id)
     rc_manager = await get_rc_manager_with_latest_heuristics()
     await rc_manager.unapply_relation(relation_id) # manual=False by default, so it will be undone without explicitly rejecting
     return JSONResponse(status_code=200, content={"message": "Foreign key relation un-rejected"})
@@ -117,7 +120,7 @@ async def evaluate_relation(relation_id: str):
     """
     Asks the agent to reevaluate a single foreign key relation
     """
-    logging.warning("Re-evaluating foreign key relation %s", relation_id)
+    logger.warning("Re-evaluating foreign key relation %s", relation_id)
     rc_manager = await get_rc_manager_with_latest_heuristics()
     await agent.evaluate(rc_manager=rc_manager, relation_id=relation_id)
     return JSONResponse(status_code=200, content={"message": "Submitted"})
@@ -128,9 +131,9 @@ async def sync_relation(relation_id: str):
     """
     Syncs a single foreign key relation with the graph database
     """
-    logging.warning("Syncing foreign key relation %s", relation_id)
+    logger.warning("Syncing foreign key relation %s", relation_id)
     rc_manager = await get_rc_manager_with_latest_heuristics()
-    await rc_manager.sync_relation("web", relation_id)
+    await rc_manager.sync_relation(GRAPH_DB_CLIENT_NAME, relation_id)
     return JSONResponse(status_code=200, content={"message": "Submitted"})
 
 
@@ -139,7 +142,7 @@ async def regenerate_ontology(background_tasks: BackgroundTasks):
     """
     Asks the agent to regenerate the ontology graph based on current foreign key relations in the data graph
     """
-    logging.warning("Regenerating ontology graph")
+    logger.warning("Regenerating ontology graph")
     if agent.is_processing or agent.is_evaluating:
         return JSONResponse(status_code=400, content={"message": "Heuristics processing is in progress"})
     background_tasks.add_task(agent.process_and_evaluate_all)
@@ -150,7 +153,7 @@ async def clear_ontology():
     """
     Clears all foreign key relations and the ontology graph
     """
-    logging.warning("Clearing all foreign key relations and the ontology graph")
+    logger.warning("Clearing all foreign key relations and the ontology graph")
     if agent.is_processing or agent.is_evaluating:
         return JSONResponse(status_code=400, content={"message": "Heuristics processing is in progress"})
     heuristics_version_id = await redis_client.get(constants.KV_HEURISTICS_VERSION_ID_KEY)
@@ -178,7 +181,7 @@ async def process_entity(entity_type: str, primary_key_value: str):
     Asks the agent to process a specific entity for heuristics, this is used for debugging
     For debugging purposes
     """
-    logging.warning("Processing entity %s:%s for heuristics", entity_type, primary_key_value)
+    logger.warning("Processing entity %s:%s for heuristics", entity_type, primary_key_value)
     rc_manager = await get_rc_manager_with_latest_heuristics()
     await agent.process(rc_manager, entity_type, primary_key_value)
     return JSONResponse(status_code=200, content={"message": "Submitted for processing"})
@@ -190,7 +193,7 @@ async def process_all(background_tasks: BackgroundTasks):
     Asks the agent to process all foreign key relations
     For debugging purposes
     """
-    logging.warning("Processing all heuristics")
+    logger.warning("Processing all heuristics")
     if agent.is_processing:
         return JSONResponse(status_code=400, content={"message": "Heuristics processing is already in progress"})
 
@@ -206,7 +209,7 @@ async def evaluate_all(background_tasks: BackgroundTasks):
     Asks the agent to reevaluate all heuristics
     For debugging purposes
     """
-    logging.warning("Re-evaluating all heuristics")
+    logger.warning("Re-evaluating all heuristics")
     if agent.is_processing:
         return JSONResponse(status_code=400, content={"message": "Heuristics processing is in progress"})
     rc_manager = await get_rc_manager_with_latest_heuristics()
@@ -221,7 +224,7 @@ async def cleanup():
     For debugging purposes
     """
     rc_manager = await get_rc_manager_with_latest_heuristics()
-    await rc_manager.cleanup() # This will remove all relations that are no longer candidates, but still exist in the graph database
+    await rc_manager.cleanup() # This will remove all relations that are no longer candidates, as well as applied relations
     return JSONResponse(status_code=200, content={"message": "Submitted"})
 
 #####
@@ -238,8 +241,10 @@ async def status():
             "evaluated_tasks_count": agent.evaluated_tasks_count,
             "candidate_acceptance_threshold": ACCEPTANCE_THRESHOLD, 
             "candidate_rejection_threshold": REJECTION_THRESHOLD, 
+            "max_concurrent_processing": MAX_CONCURRENT_PROCESSING,
+            "max_concurrent_evaluation": MAX_CONCURRENT_EVALUATION,
             "min_count_for_eval": MIN_COUNT_FOR_EVAL,   
-            "percent_change_for_eval": PERCENT_CHANGE_FOR_EVAL}
+            "count_change_threshold_ratio": COUNT_CHANGE_THRESHOLD_RATIO,}
 
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=port)
@@ -5,8 +5,8 @@
 import pytest
 from core.models import RelationCandidate
 from core.graph_db.neo4j.graph_db import Neo4jDB
-from agent_graph_gen.relation_manager import RelationCandidateManager
-from agent_graph_gen.agent import OntologyAgent
+from agent_ontology.relation_manager import RelationCandidateManager
+from agent_ontology.agent import OntologyAgent
 from core.utils import get_default_fresh_until
 from core.models import Entity
 from core.key_value.base import KVStore
@@ -70,7 +70,7 @@ async def test_each_evaluation():
             acceptance_threshold=rc.acceptance_threshold, 
             rejection_threshold=rc.rejection_threshold, 
             min_count_for_eval=1, 
-            percent_change_for_eval=0.2, 
+            count_change_threshold_ratio=0.2, 
             max_concurrent_processing=30, 
             max_concurrent_evaluation=5)
         logging.info("Running heuristics processing...")
 
@@ -3,7 +3,7 @@
 import pytest_asyncio
 import logging
 import json
-from agent_graph_gen.relation_manager import RelationCandidateManager
+from agent_ontology.relation_manager import RelationCandidateManager
 
 from core.graph_db.neo4j.graph_db import Neo4jDB
 from core.utils import ObjEncoder, get_default_fresh_until
@@ -13,7 +13,7 @@
 from core.models import Entity
 
 
-from agent_graph_gen.heuristics import HeuristicsProcessor
+from agent_ontology.heuristics import HeuristicsProcessor
 from core.constants import DEFAULT_LABEL, UPDATED_BY_KEY
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+/github/workspace/ai_platform_engineering/knowledge_bases/rag/common/src/common/graph_db/neo4j/graph_db.py:generic-api-key:48`
	`2`	`+/github/workspace/ai_platform_engineering/knowledge_bases/rag/common/src/common/graph_db/neo4j/graph_db.py:generic-api-key:49`