From 551c9958d3e0869066df2294e9ebacf5466a9314 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 13 May 2025 15:45:06 +0200 Subject: [PATCH 001/114] build: third party integration first commit --- cognition_objects/third_party_integration.py | 93 ++++++++++++++++++++ enums.py | 17 ++++ models.py | 31 +++++++ 3 files changed, 141 insertions(+) create mode 100644 cognition_objects/third_party_integration.py diff --git a/cognition_objects/third_party_integration.py b/cognition_objects/third_party_integration.py new file mode 100644 index 00000000..e074ff64 --- /dev/null +++ b/cognition_objects/third_party_integration.py @@ -0,0 +1,93 @@ +from typing import List, Optional, Dict +from datetime import datetime + +from ..business_objects import general +from ..session import session +from ..models import CognitionThirdPartyIntegration + + +def get_by_id(id: str) -> CognitionThirdPartyIntegration: + return ( + session.query(CognitionThirdPartyIntegration) + .filter(CognitionThirdPartyIntegration.id == id) + .first() + ) + + +def get(project_id: str, name: str) -> CognitionThirdPartyIntegration: + return ( + session.query(CognitionThirdPartyIntegration) + .filter( + CognitionThirdPartyIntegration.project_id == project_id, + CognitionThirdPartyIntegration.name == name, + ) + .first() + ) + + +def get_all_by_project_id(project_id: str) -> List[CognitionThirdPartyIntegration]: + return ( + session.query(CognitionThirdPartyIntegration) + .filter( + CognitionThirdPartyIntegration.project_id == project_id, + ) + .order_by(CognitionThirdPartyIntegration.created_at.asc()) + .all() + ) + + +def create( + project_id: str, + user_id: str, + name: str, + description: str, + integration_type: str, + integration_config: Dict, + llm_config: Dict, + with_commit: bool = True, + created_at: Optional[datetime] = None, + id: Optional[str] = None, +) -> CognitionThirdPartyIntegration: + integration: CognitionThirdPartyIntegration = CognitionThirdPartyIntegration( + id=id, + project_id=project_id, + created_by=user_id, + created_at=created_at, + name=name, + description=description, + type=integration_type, + config=integration_config, + llm_config=llm_config, + ) + general.add(integration, with_commit) + + return integration + + +def update( + id: str, + name: Optional[str] = None, + description: Optional[str] = None, + integration_config: Optional[int] = None, + llm_config: Optional[Dict] = None, + with_commit: bool = True, +) -> CognitionThirdPartyIntegration: + integration: CognitionThirdPartyIntegration = get_by_id(id) + + if name is not None: + integration.name = name + if description is not None: + integration.description = description + if integration_config is not None: + integration.config = integration_config + if llm_config is not None: + integration.llm_config = llm_config + general.flush_or_commit(with_commit) + return integration + + +def delete(id: str, with_commit: bool = True) -> None: + session.query(CognitionThirdPartyIntegration).filter( + CognitionThirdPartyIntegration.id == id + ).delete() + general.flush_or_commit(with_commit) diff --git a/enums.py b/enums.py index 0a920a81..1070cc99 100644 --- a/enums.py +++ b/enums.py @@ -155,6 +155,7 @@ class Tablenames(Enum): EVALUATION_RUN = "evaluation_run" PLAYGROUND_QUESTION = "playground_question" FULL_ADMIN_ACCESS = "full_admin_access" + THIRD_PARTY_INTEGRATION = "third_party_integration" def snake_case_to_pascal_case(self): # the type name (written in PascalCase) of a table is needed to create backrefs @@ -493,6 +494,7 @@ class TaskType(Enum): TASK_QUEUE_ACTION = "task_queue_action" RUN_COGNITION_MACRO = "RUN_COGNITION_MACRO" PARSE_COGNITION_FILE = "PARSE_COGNITION_FILE" + THIRD_PARTY_INTEGRATION = "THIRD_PARTY_INTEGRATION" class TaskQueueAction(Enum): @@ -871,3 +873,18 @@ class EvaluationRunState(Enum): RUNNING = "RUNNING" SUCCESS = "SUCCESS" FAILED = "FAILED" + + +class CognitionThirdPartyIntegrationType(Enum): + # CSV = "CSV" + # JSON = "JSON" + # PDF = "PDF" TODO: how to handle ETL + # XLSX = "XLSX" + WEBPAGE = "WEBPAGE" + GITHUB = "GITHUB" + + def all(): + return [ + CognitionThirdPartyIntegrationType.WEBPAGE.value, + CognitionThirdPartyIntegrationType.GITHUB.value, + ] diff --git a/models.py b/models.py index 1dae54d2..1a50b6eb 100644 --- a/models.py +++ b/models.py @@ -2070,3 +2070,34 @@ class FullAdminAccess(Base): id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) email = Column(String, unique=True) meta_info = Column(JSON) + + +class CognitionThirdPartyIntegration(Base): + __tablename__ = Tablenames.THIRD_PARTY_INTEGRATION.value + __table_args__ = {"schema": "cognition"} + id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + project_id = Column( + UUID(as_uuid=True), + ForeignKey(f"{Tablenames.PROJECT.value}.id", ondelete="CASCADE"), + index=True, + ) + created_by = Column( + UUID(as_uuid=True), + ForeignKey(f"{Tablenames.USER.value}.id", ondelete="SET NULL"), + index=True, + ) + created_at = Column(DateTime, default=sql.func.now()) + name = Column(String) + description = Column(String) + state = Column(String) # of type enums.CognitionMarkdownFileState.*.value + type = Column(String) # of type enums.CognitionThirdPartyIntegrationType.*.value + config = Column(JSON) + """JSON object that contains the configuration for the integration type. + Examples: + - For a webhook integration, it might contain the URL and headers. + - For an API integration, it might contain the API key and endpoint. + - For a database integration, it might contain the connection string and credentials. + + """ + + llm_config = Column(JSON) From e115cac53e7bf909ed308693d8fff9cf64b03f56 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Wed, 14 May 2025 11:13:42 +0200 Subject: [PATCH 002/114] chore: update enums --- cognition_objects/third_party_integration.py | 26 +++++++++++++++++ enums.py | 30 ++++++++++++++------ models.py | 1 + 3 files changed, 48 insertions(+), 9 deletions(-) diff --git a/cognition_objects/third_party_integration.py b/cognition_objects/third_party_integration.py index e074ff64..88f09e5b 100644 --- a/cognition_objects/third_party_integration.py +++ b/cognition_objects/third_party_integration.py @@ -1,9 +1,11 @@ from typing import List, Optional, Dict from datetime import datetime +from fastapi import HTTPException from ..business_objects import general from ..session import session from ..models import CognitionThirdPartyIntegration +from ..enums import CognitionMarkdownFileState def get_by_id(id: str) -> CognitionThirdPartyIntegration: @@ -41,6 +43,7 @@ def create( user_id: str, name: str, description: str, + state: str, integration_type: str, integration_config: Dict, llm_config: Dict, @@ -48,6 +51,8 @@ def create( created_at: Optional[datetime] = None, id: Optional[str] = None, ) -> CognitionThirdPartyIntegration: + if state not in CognitionMarkdownFileState.all(): + raise HTTPException(status_code=400, detail=f"Invalid state: {state}") integration: CognitionThirdPartyIntegration = CognitionThirdPartyIntegration( id=id, project_id=project_id, @@ -55,6 +60,7 @@ def create( created_at=created_at, name=name, description=description, + state=state, type=integration_type, config=integration_config, llm_config=llm_config, @@ -68,8 +74,10 @@ def update( id: str, name: Optional[str] = None, description: Optional[str] = None, + state: Optional[CognitionMarkdownFileState] = None, integration_config: Optional[int] = None, llm_config: Optional[Dict] = None, + error_message: Optional[str] = None, with_commit: bool = True, ) -> CognitionThirdPartyIntegration: integration: CognitionThirdPartyIntegration = get_by_id(id) @@ -78,14 +86,32 @@ def update( integration.name = name if description is not None: integration.description = description + if state is not None: + if state not in CognitionMarkdownFileState.all(): + raise HTTPException(status_code=400, detail=f"Invalid state: {state}") + integration.state = state if integration_config is not None: integration.config = integration_config if llm_config is not None: integration.llm_config = llm_config + if error_message is not None: + integration.error_message = error_message general.flush_or_commit(with_commit) return integration +def execution_finished(id: str) -> bool: + return bool( + session.query(CognitionThirdPartyIntegration) + .filter( + CognitionThirdPartyIntegration.id == id, + CognitionThirdPartyIntegration.state + == CognitionMarkdownFileState.FINISHED.value, + ) + .first() + ) + + def delete(id: str, with_commit: bool = True) -> None: session.query(CognitionThirdPartyIntegration).filter( CognitionThirdPartyIntegration.id == id diff --git a/enums.py b/enums.py index 1070cc99..fd771d70 100644 --- a/enums.py +++ b/enums.py @@ -679,6 +679,17 @@ class CognitionMarkdownFileState(Enum): FINISHED = "FINISHED" FAILED = "FAILED" + def all(): + return [ + CognitionMarkdownFileState.QUEUE.value, + CognitionMarkdownFileState.EXTRACTING.value, + CognitionMarkdownFileState.TOKENIZING.value, + CognitionMarkdownFileState.SPLITTING.value, + CognitionMarkdownFileState.TRANSFORMING.value, + CognitionMarkdownFileState.FINISHED.value, + CognitionMarkdownFileState.FAILED.value, + ] + class CognitionInterfaceType(Enum): CHAT = "CHAT" @@ -876,15 +887,16 @@ class EvaluationRunState(Enum): class CognitionThirdPartyIntegrationType(Enum): + # PDF = "PDF" TODO: how to handle ETL # CSV = "CSV" # JSON = "JSON" - # PDF = "PDF" TODO: how to handle ETL + # DOCX = "DOCX" # XLSX = "XLSX" - WEBPAGE = "WEBPAGE" - GITHUB = "GITHUB" - - def all(): - return [ - CognitionThirdPartyIntegrationType.WEBPAGE.value, - CognitionThirdPartyIntegrationType.GITHUB.value, - ] + # WEBPAGE = "WEBPAGE" + SQL = "SQL" + GITHUB_FILE = "GITHUB_FILE" + GITHUB_ISSUE = "GITHUB_ISSUE" + + @classmethod + def all(cls): + return [e.value for e in cls] diff --git a/models.py b/models.py index 1a50b6eb..9ecebf60 100644 --- a/models.py +++ b/models.py @@ -2101,3 +2101,4 @@ class CognitionThirdPartyIntegration(Base): """ llm_config = Column(JSON) + error_message = Column(String) From 4d9970a043be39672eacf72dc54b93e0b0e5fe0e Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 15 May 2025 15:19:15 +0200 Subject: [PATCH 003/114] perf: add integration acess --- cognition_objects/integration_access.py | 64 +++++++++++++++++++++++++ enums.py | 13 ++++- models.py | 20 ++++++++ 3 files changed, 96 insertions(+), 1 deletion(-) create mode 100644 cognition_objects/integration_access.py diff --git a/cognition_objects/integration_access.py b/cognition_objects/integration_access.py new file mode 100644 index 00000000..657caf06 --- /dev/null +++ b/cognition_objects/integration_access.py @@ -0,0 +1,64 @@ +from typing import List, Optional +from datetime import datetime + +from ..business_objects import general +from ..session import session +from ..models import CognitionOrganizationIntegrationAccess +from ..enums import CognitionThirdPartyIntegrationType + + +def get_by_id(id: str) -> CognitionOrganizationIntegrationAccess: + return ( + session.query(CognitionOrganizationIntegrationAccess) + .filter(CognitionOrganizationIntegrationAccess.id == id) + .first() + ) + + +def get_by_org_id(org_id: str) -> List[CognitionOrganizationIntegrationAccess]: + return ( + session.query(CognitionOrganizationIntegrationAccess) + .filter(CognitionOrganizationIntegrationAccess.organization_id == org_id) + .all() + ) + + +def get( + org_id: str, integration_type: CognitionThirdPartyIntegrationType +) -> List[CognitionOrganizationIntegrationAccess]: + return ( + session.query(CognitionOrganizationIntegrationAccess) + .filter( + CognitionOrganizationIntegrationAccess.organization_id == org_id, + CognitionOrganizationIntegrationAccess.integration_type == integration_type, + ) + .order_by(CognitionOrganizationIntegrationAccess.created_at.asc()) + .all() + ) + + +def create( + org_id: str, + user_id: str, + integration_type: CognitionThirdPartyIntegrationType, + with_commit: bool = True, + created_at: Optional[datetime] = None, +) -> CognitionOrganizationIntegrationAccess: + integration_access: CognitionOrganizationIntegrationAccess = ( + CognitionOrganizationIntegrationAccess( + org_id=org_id, + created_by=user_id, + created_at=created_at, + integration_type=integration_type, + ) + ) + general.add(integration_access, with_commit) + + return integration_access + + +def delete(id: str, with_commit: bool = True) -> None: + session.query(CognitionOrganizationIntegrationAccess).filter( + CognitionOrganizationIntegrationAccess.id == id + ).delete() + general.flush_or_commit(with_commit) diff --git a/enums.py b/enums.py index fd771d70..20bc502a 100644 --- a/enums.py +++ b/enums.py @@ -156,6 +156,7 @@ class Tablenames(Enum): PLAYGROUND_QUESTION = "playground_question" FULL_ADMIN_ACCESS = "full_admin_access" THIRD_PARTY_INTEGRATION = "third_party_integration" + ORGANIZATION_INTEGRATION_ACCESS = "integration_organization_access" def snake_case_to_pascal_case(self): # the type name (written in PascalCase) of a table is needed to create backrefs @@ -887,7 +888,6 @@ class EvaluationRunState(Enum): class CognitionThirdPartyIntegrationType(Enum): - # PDF = "PDF" TODO: how to handle ETL # CSV = "CSV" # JSON = "JSON" # DOCX = "DOCX" @@ -896,6 +896,17 @@ class CognitionThirdPartyIntegrationType(Enum): SQL = "SQL" GITHUB_FILE = "GITHUB_FILE" GITHUB_ISSUE = "GITHUB_ISSUE" + PDF = "PDF" + + @staticmethod + def from_string(value: str): + changed_value = value.upper().replace(" ", "_").replace("-", "_") + try: + return CognitionThirdPartyIntegrationType[changed_value] + except KeyError: + raise ValueError( + "Could not parse CognitionThirdPartyIntegrationType from string" + ) @classmethod def all(cls): diff --git a/models.py b/models.py index 9ecebf60..6603b8c1 100644 --- a/models.py +++ b/models.py @@ -2102,3 +2102,23 @@ class CognitionThirdPartyIntegration(Base): llm_config = Column(JSON) error_message = Column(String) + + +class CognitionOrganizationIntegrationAccess(Base): + __tablename__ = Tablenames.ORGANIZATION_INTEGRATION_ACCESS.value + __table_args__ = {"schema": "cognition"} + id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + created_by = Column( + UUID(as_uuid=True), + ForeignKey(f"{Tablenames.USER.value}.id", ondelete="SET NULL"), + index=True, + ) + created_at = Column(DateTime, default=sql.func.now()) + organization_id = Column( + UUID(as_uuid=True), + ForeignKey(f"{Tablenames.ORGANIZATION.value}.id", ondelete="CASCADE"), + index=True, + ) + integration_type = Column( + String + ) # of type enums.CognitionThirdPartyIntegrationType.*.value From 80712fce5257513a3139584a1dec555ff0bb15ba Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 15 May 2025 23:31:54 +0200 Subject: [PATCH 004/114] perf: rename to integration --- ...rd_party_integration.py => integration.py} | 46 +++++++++--------- cognition_objects/integration_access.py | 48 +++++++++---------- enums.py | 18 +++---- models.py | 14 +++--- 4 files changed, 58 insertions(+), 68 deletions(-) rename cognition_objects/{third_party_integration.py => integration.py} (62%) diff --git a/cognition_objects/third_party_integration.py b/cognition_objects/integration.py similarity index 62% rename from cognition_objects/third_party_integration.py rename to cognition_objects/integration.py index 88f09e5b..5dd561ae 100644 --- a/cognition_objects/third_party_integration.py +++ b/cognition_objects/integration.py @@ -4,36 +4,36 @@ from ..business_objects import general from ..session import session -from ..models import CognitionThirdPartyIntegration +from ..models import CognitionIntegration from ..enums import CognitionMarkdownFileState -def get_by_id(id: str) -> CognitionThirdPartyIntegration: +def get_by_id(id: str) -> CognitionIntegration: return ( - session.query(CognitionThirdPartyIntegration) - .filter(CognitionThirdPartyIntegration.id == id) + session.query(CognitionIntegration) + .filter(CognitionIntegration.id == id) .first() ) -def get(project_id: str, name: str) -> CognitionThirdPartyIntegration: +def get(project_id: str, name: str) -> CognitionIntegration: return ( - session.query(CognitionThirdPartyIntegration) + session.query(CognitionIntegration) .filter( - CognitionThirdPartyIntegration.project_id == project_id, - CognitionThirdPartyIntegration.name == name, + CognitionIntegration.project_id == project_id, + CognitionIntegration.name == name, ) .first() ) -def get_all_by_project_id(project_id: str) -> List[CognitionThirdPartyIntegration]: +def get_all_by_project_id(project_id: str) -> List[CognitionIntegration]: return ( - session.query(CognitionThirdPartyIntegration) + session.query(CognitionIntegration) .filter( - CognitionThirdPartyIntegration.project_id == project_id, + CognitionIntegration.project_id == project_id, ) - .order_by(CognitionThirdPartyIntegration.created_at.asc()) + .order_by(CognitionIntegration.created_at.asc()) .all() ) @@ -50,10 +50,10 @@ def create( with_commit: bool = True, created_at: Optional[datetime] = None, id: Optional[str] = None, -) -> CognitionThirdPartyIntegration: +) -> CognitionIntegration: if state not in CognitionMarkdownFileState.all(): raise HTTPException(status_code=400, detail=f"Invalid state: {state}") - integration: CognitionThirdPartyIntegration = CognitionThirdPartyIntegration( + integration: CognitionIntegration = CognitionIntegration( id=id, project_id=project_id, created_by=user_id, @@ -79,8 +79,8 @@ def update( llm_config: Optional[Dict] = None, error_message: Optional[str] = None, with_commit: bool = True, -) -> CognitionThirdPartyIntegration: - integration: CognitionThirdPartyIntegration = get_by_id(id) +) -> CognitionIntegration: + integration: CognitionIntegration = get_by_id(id) if name is not None: integration.name = name @@ -96,24 +96,22 @@ def update( integration.llm_config = llm_config if error_message is not None: integration.error_message = error_message - general.flush_or_commit(with_commit) + + general.add(integration, with_commit) return integration def execution_finished(id: str) -> bool: return bool( - session.query(CognitionThirdPartyIntegration) + session.query(CognitionIntegration) .filter( - CognitionThirdPartyIntegration.id == id, - CognitionThirdPartyIntegration.state - == CognitionMarkdownFileState.FINISHED.value, + CognitionIntegration.id == id, + CognitionIntegration.state == CognitionMarkdownFileState.FINISHED.value, ) .first() ) def delete(id: str, with_commit: bool = True) -> None: - session.query(CognitionThirdPartyIntegration).filter( - CognitionThirdPartyIntegration.id == id - ).delete() + session.query(CognitionIntegration).filter(CognitionIntegration.id == id).delete() general.flush_or_commit(with_commit) diff --git a/cognition_objects/integration_access.py b/cognition_objects/integration_access.py index 657caf06..0aa1534c 100644 --- a/cognition_objects/integration_access.py +++ b/cognition_objects/integration_access.py @@ -3,36 +3,36 @@ from ..business_objects import general from ..session import session -from ..models import CognitionOrganizationIntegrationAccess -from ..enums import CognitionThirdPartyIntegrationType +from ..models import CognitionIntegrationAccess +from ..enums import CognitionIntegrationType -def get_by_id(id: str) -> CognitionOrganizationIntegrationAccess: +def get_by_id(id: str) -> CognitionIntegrationAccess: return ( - session.query(CognitionOrganizationIntegrationAccess) - .filter(CognitionOrganizationIntegrationAccess.id == id) + session.query(CognitionIntegrationAccess) + .filter(CognitionIntegrationAccess.id == id) .first() ) -def get_by_org_id(org_id: str) -> List[CognitionOrganizationIntegrationAccess]: +def get_by_org_id(org_id: str) -> List[CognitionIntegrationAccess]: return ( - session.query(CognitionOrganizationIntegrationAccess) - .filter(CognitionOrganizationIntegrationAccess.organization_id == org_id) + session.query(CognitionIntegrationAccess) + .filter(CognitionIntegrationAccess.organization_id == org_id) .all() ) def get( - org_id: str, integration_type: CognitionThirdPartyIntegrationType -) -> List[CognitionOrganizationIntegrationAccess]: + org_id: str, integration_type: CognitionIntegrationType +) -> List[CognitionIntegrationAccess]: return ( - session.query(CognitionOrganizationIntegrationAccess) + session.query(CognitionIntegrationAccess) .filter( - CognitionOrganizationIntegrationAccess.organization_id == org_id, - CognitionOrganizationIntegrationAccess.integration_type == integration_type, + CognitionIntegrationAccess.organization_id == org_id, + CognitionIntegrationAccess.integration_type == integration_type, ) - .order_by(CognitionOrganizationIntegrationAccess.created_at.asc()) + .order_by(CognitionIntegrationAccess.created_at.asc()) .all() ) @@ -40,17 +40,15 @@ def get( def create( org_id: str, user_id: str, - integration_type: CognitionThirdPartyIntegrationType, + integration_type: CognitionIntegrationType, with_commit: bool = True, created_at: Optional[datetime] = None, -) -> CognitionOrganizationIntegrationAccess: - integration_access: CognitionOrganizationIntegrationAccess = ( - CognitionOrganizationIntegrationAccess( - org_id=org_id, - created_by=user_id, - created_at=created_at, - integration_type=integration_type, - ) +) -> CognitionIntegrationAccess: + integration_access: CognitionIntegrationAccess = CognitionIntegrationAccess( + org_id=org_id, + created_by=user_id, + created_at=created_at, + integration_type=integration_type, ) general.add(integration_access, with_commit) @@ -58,7 +56,7 @@ def create( def delete(id: str, with_commit: bool = True) -> None: - session.query(CognitionOrganizationIntegrationAccess).filter( - CognitionOrganizationIntegrationAccess.id == id + session.query(CognitionIntegrationAccess).filter( + CognitionIntegrationAccess.id == id ).delete() general.flush_or_commit(with_commit) diff --git a/enums.py b/enums.py index 20bc502a..39233717 100644 --- a/enums.py +++ b/enums.py @@ -155,8 +155,8 @@ class Tablenames(Enum): EVALUATION_RUN = "evaluation_run" PLAYGROUND_QUESTION = "playground_question" FULL_ADMIN_ACCESS = "full_admin_access" - THIRD_PARTY_INTEGRATION = "third_party_integration" - ORGANIZATION_INTEGRATION_ACCESS = "integration_organization_access" + INTEGRATION = "integration" + INTEGRATION_ACCESS = "integration_access" def snake_case_to_pascal_case(self): # the type name (written in PascalCase) of a table is needed to create backrefs @@ -495,7 +495,7 @@ class TaskType(Enum): TASK_QUEUE_ACTION = "task_queue_action" RUN_COGNITION_MACRO = "RUN_COGNITION_MACRO" PARSE_COGNITION_FILE = "PARSE_COGNITION_FILE" - THIRD_PARTY_INTEGRATION = "THIRD_PARTY_INTEGRATION" + INTEGRATION = "INTEGRATION" class TaskQueueAction(Enum): @@ -887,7 +887,7 @@ class EvaluationRunState(Enum): FAILED = "FAILED" -class CognitionThirdPartyIntegrationType(Enum): +class CognitionIntegrationType(Enum): # CSV = "CSV" # JSON = "JSON" # DOCX = "DOCX" @@ -902,12 +902,8 @@ class CognitionThirdPartyIntegrationType(Enum): def from_string(value: str): changed_value = value.upper().replace(" ", "_").replace("-", "_") try: - return CognitionThirdPartyIntegrationType[changed_value] + return CognitionIntegrationType[changed_value] except KeyError: - raise ValueError( - "Could not parse CognitionThirdPartyIntegrationType from string" + raise KeyError( + f"Could not parse CognitionIntegrationType from string '{changed_value}'" ) - - @classmethod - def all(cls): - return [e.value for e in cls] diff --git a/models.py b/models.py index 6603b8c1..5a22a34a 100644 --- a/models.py +++ b/models.py @@ -2072,8 +2072,8 @@ class FullAdminAccess(Base): meta_info = Column(JSON) -class CognitionThirdPartyIntegration(Base): - __tablename__ = Tablenames.THIRD_PARTY_INTEGRATION.value +class CognitionIntegration(Base): + __tablename__ = Tablenames.INTEGRATION.value __table_args__ = {"schema": "cognition"} id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) project_id = Column( @@ -2090,7 +2090,7 @@ class CognitionThirdPartyIntegration(Base): name = Column(String) description = Column(String) state = Column(String) # of type enums.CognitionMarkdownFileState.*.value - type = Column(String) # of type enums.CognitionThirdPartyIntegrationType.*.value + type = Column(String) # of type enums.CognitionIntegrationType.*.value config = Column(JSON) """JSON object that contains the configuration for the integration type. Examples: @@ -2104,8 +2104,8 @@ class CognitionThirdPartyIntegration(Base): error_message = Column(String) -class CognitionOrganizationIntegrationAccess(Base): - __tablename__ = Tablenames.ORGANIZATION_INTEGRATION_ACCESS.value +class CognitionIntegrationAccess(Base): + __tablename__ = Tablenames.INTEGRATION_ACCESS.value __table_args__ = {"schema": "cognition"} id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) created_by = Column( @@ -2119,6 +2119,4 @@ class CognitionOrganizationIntegrationAccess(Base): ForeignKey(f"{Tablenames.ORGANIZATION.value}.id", ondelete="CASCADE"), index=True, ) - integration_type = Column( - String - ) # of type enums.CognitionThirdPartyIntegrationType.*.value + integration_type = Column(String) # of type enums.CognitionIntegrationType.*.value From fbd5a856dd9f13746f9f4c4a72894a40cffb8983 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 16 May 2025 11:49:36 +0200 Subject: [PATCH 005/114] perf: add last_extraction column to integration --- models.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/models.py b/models.py index 5a22a34a..a3ca46b6 100644 --- a/models.py +++ b/models.py @@ -2102,6 +2102,11 @@ class CognitionIntegration(Base): llm_config = Column(JSON) error_message = Column(String) + last_extraction = Column(JSON, default={}) + # Information relevant for "delta" extraction. Varies based on the integration type. + # e.g. for github issue => last timestamp + # e.g. for github file => file name + SHA + # e.g. for PDF => file name + page number class CognitionIntegrationAccess(Base): From 0dbad39a76585470f9e248a3a6ed8e05558f45e6 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 16 May 2025 13:43:26 +0200 Subject: [PATCH 006/114] perf: update integration delta --- cognition_objects/integration.py | 5 +++++ models.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index 5dd561ae..1a94277f 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -47,6 +47,7 @@ def create( integration_type: str, integration_config: Dict, llm_config: Dict, + last_extraction: Optional[Dict] = {}, with_commit: bool = True, created_at: Optional[datetime] = None, id: Optional[str] = None, @@ -64,6 +65,7 @@ def create( type=integration_type, config=integration_config, llm_config=llm_config, + last_extraction=last_extraction, ) general.add(integration, with_commit) @@ -78,6 +80,7 @@ def update( integration_config: Optional[int] = None, llm_config: Optional[Dict] = None, error_message: Optional[str] = None, + last_extraction: Optional[Dict] = None, with_commit: bool = True, ) -> CognitionIntegration: integration: CognitionIntegration = get_by_id(id) @@ -96,6 +99,8 @@ def update( integration.llm_config = llm_config if error_message is not None: integration.error_message = error_message + if last_extraction is not None: + integration.last_extraction = last_extraction general.add(integration, with_commit) return integration diff --git a/models.py b/models.py index a3ca46b6..5c211ad5 100644 --- a/models.py +++ b/models.py @@ -2102,7 +2102,7 @@ class CognitionIntegration(Base): llm_config = Column(JSON) error_message = Column(String) - last_extraction = Column(JSON, default={}) + last_extraction = Column(JSON) # Information relevant for "delta" extraction. Varies based on the integration type. # e.g. for github issue => last timestamp # e.g. for github file => file name + SHA From 458e4dcf2255079d56259376339ce7e2e75e57f1 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 16 May 2025 15:23:15 +0200 Subject: [PATCH 007/114] perf: update integration access to list types --- cognition_objects/integration_access.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cognition_objects/integration_access.py b/cognition_objects/integration_access.py index 0aa1534c..ed247c67 100644 --- a/cognition_objects/integration_access.py +++ b/cognition_objects/integration_access.py @@ -40,7 +40,7 @@ def get( def create( org_id: str, user_id: str, - integration_type: CognitionIntegrationType, + integration_types: List[CognitionIntegrationType], with_commit: bool = True, created_at: Optional[datetime] = None, ) -> CognitionIntegrationAccess: @@ -48,7 +48,7 @@ def create( org_id=org_id, created_by=user_id, created_at=created_at, - integration_type=integration_type, + integration_types=integration_types, ) general.add(integration_access, with_commit) From 7d70cd4206b3fd6b8f343757317a852888ffefda Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 16 May 2025 15:53:14 +0200 Subject: [PATCH 008/114] perf: add integration_types to integration access --- models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models.py b/models.py index 5c211ad5..dc5d92e0 100644 --- a/models.py +++ b/models.py @@ -2124,4 +2124,4 @@ class CognitionIntegrationAccess(Base): ForeignKey(f"{Tablenames.ORGANIZATION.value}.id", ondelete="CASCADE"), index=True, ) - integration_type = Column(String) # of type enums.CognitionIntegrationType.*.value + integration_types = Column(ARRAY) # of type enums.CognitionIntegrationType.*.value From bbf643e53a210daf4aff6abead0ea8b5af362839 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 16 May 2025 15:54:02 +0200 Subject: [PATCH 009/114] perf: add integration_types to integration access --- models.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/models.py b/models.py index dc5d92e0..053373bf 100644 --- a/models.py +++ b/models.py @@ -2124,4 +2124,6 @@ class CognitionIntegrationAccess(Base): ForeignKey(f"{Tablenames.ORGANIZATION.value}.id", ondelete="CASCADE"), index=True, ) - integration_types = Column(ARRAY) # of type enums.CognitionIntegrationType.*.value + integration_types = Column( + ARRAY(String) + ) # of type enums.CognitionIntegrationType.*.value From cc57cb4d021809015d71dd4d31e23cd0869ab9dd Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 16 May 2025 16:03:40 +0200 Subject: [PATCH 010/114] perf: rename last_extraction to extract_history --- cognition_objects/integration.py | 17 ++++++++++++----- models.py | 2 +- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index 1a94277f..e8f4b9e4 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -47,7 +47,7 @@ def create( integration_type: str, integration_config: Dict, llm_config: Dict, - last_extraction: Optional[Dict] = {}, + extract_history: Optional[Dict] = {}, with_commit: bool = True, created_at: Optional[datetime] = None, id: Optional[str] = None, @@ -65,7 +65,7 @@ def create( type=integration_type, config=integration_config, llm_config=llm_config, - last_extraction=last_extraction, + extract_history=extract_history, ) general.add(integration, with_commit) @@ -80,7 +80,7 @@ def update( integration_config: Optional[int] = None, llm_config: Optional[Dict] = None, error_message: Optional[str] = None, - last_extraction: Optional[Dict] = None, + extract_history: Optional[Dict] = None, with_commit: bool = True, ) -> CognitionIntegration: integration: CognitionIntegration = get_by_id(id) @@ -99,8 +99,8 @@ def update( integration.llm_config = llm_config if error_message is not None: integration.error_message = error_message - if last_extraction is not None: - integration.last_extraction = last_extraction + if extract_history is not None: + integration.extract_history = extract_history general.add(integration, with_commit) return integration @@ -117,6 +117,13 @@ def execution_finished(id: str) -> bool: ) +def clear_history(id: str) -> None: + integration: CognitionIntegration = get_by_id(id) + integration.extract_history = {} + integration.state = CognitionMarkdownFileState.QUEUE.value + general.add(integration, True) + + def delete(id: str, with_commit: bool = True) -> None: session.query(CognitionIntegration).filter(CognitionIntegration.id == id).delete() general.flush_or_commit(with_commit) diff --git a/models.py b/models.py index 053373bf..6dab21db 100644 --- a/models.py +++ b/models.py @@ -2102,7 +2102,7 @@ class CognitionIntegration(Base): llm_config = Column(JSON) error_message = Column(String) - last_extraction = Column(JSON) + extract_history = Column(JSON) # Information relevant for "delta" extraction. Varies based on the integration type. # e.g. for github issue => last timestamp # e.g. for github file => file name + SHA From 2da4e15025d197a0e3175975b8de2af2edd44220 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 16 May 2025 16:54:36 +0200 Subject: [PATCH 011/114] fix: store enum.value instead of enum --- cognition_objects/integration.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index e8f4b9e4..c49f2295 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -5,7 +5,10 @@ from ..business_objects import general from ..session import session from ..models import CognitionIntegration -from ..enums import CognitionMarkdownFileState +from ..enums import ( + CognitionMarkdownFileState, + CognitionIntegrationType, +) def get_by_id(id: str) -> CognitionIntegration: @@ -44,7 +47,7 @@ def create( name: str, description: str, state: str, - integration_type: str, + integration_type: CognitionIntegrationType, integration_config: Dict, llm_config: Dict, extract_history: Optional[Dict] = {}, @@ -62,7 +65,7 @@ def create( name=name, description=description, state=state, - type=integration_type, + type=integration_type.value, config=integration_config, llm_config=llm_config, extract_history=extract_history, From edad963124969c8dd3103f4be4b9a5080d7b690a Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 16 May 2025 16:54:49 +0200 Subject: [PATCH 012/114] fix: integration.project_id nullable --- models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models.py b/models.py index 6dab21db..6bc23d2c 100644 --- a/models.py +++ b/models.py @@ -2078,7 +2078,7 @@ class CognitionIntegration(Base): id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) project_id = Column( UUID(as_uuid=True), - ForeignKey(f"{Tablenames.PROJECT.value}.id", ondelete="CASCADE"), + ForeignKey(f"{Tablenames.PROJECT.value}.id", nullable=True, ondelete="CASCADE"), index=True, ) created_by = Column( From 4e52cb86c53e1c27d81c251343f27c14438b4cf1 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 16 May 2025 16:58:13 +0200 Subject: [PATCH 013/114] fix: nulable column instead of foreignkey --- models.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/models.py b/models.py index 6bc23d2c..9c8e5766 100644 --- a/models.py +++ b/models.py @@ -2078,8 +2078,9 @@ class CognitionIntegration(Base): id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) project_id = Column( UUID(as_uuid=True), - ForeignKey(f"{Tablenames.PROJECT.value}.id", nullable=True, ondelete="CASCADE"), + ForeignKey(f"{Tablenames.PROJECT.value}.id", ondelete="CASCADE"), index=True, + nullable=True, ) created_by = Column( UUID(as_uuid=True), From 03842fda4fe9188fc22cb2fa0bd18405e5b55281 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 16 May 2025 22:11:34 +0200 Subject: [PATCH 014/114] fix: enum values --- cognition_objects/integration.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index c49f2295..f08f9586 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -93,9 +93,7 @@ def update( if description is not None: integration.description = description if state is not None: - if state not in CognitionMarkdownFileState.all(): - raise HTTPException(status_code=400, detail=f"Invalid state: {state}") - integration.state = state + integration.state = state.value if integration_config is not None: integration.config = integration_config if llm_config is not None: From 0386f6cb777eea730581a76e4382d510097a6b16 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 16 May 2025 22:13:40 +0200 Subject: [PATCH 015/114] perf: task cancellation --- business_objects/monitor.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/business_objects/monitor.py b/business_objects/monitor.py index 6baaf5f4..5faaf353 100644 --- a/business_objects/monitor.py +++ b/business_objects/monitor.py @@ -9,6 +9,7 @@ markdown_file as markdown_file_db_bo, file_extraction as file_extraction_db_bo, file_transformation as file_transformation_db_bo, + integration as integration_db_bo, ) FILE_CACHING_IN_PROGRESS_STATES = [ @@ -197,6 +198,16 @@ def set_parse_cognition_file_task_to_failed( general.commit() +def set_integration_task_to_failed( + integration_id: str, + with_commit: bool = False, +) -> None: + integration = integration_db_bo.get_by_id(integration_id) + if integration: + integration.state = enums.CognitionMarkdownFileState.FAILED.value + general.flush_or_commit(with_commit) + + def __select_running_information_source_payloads( project_id: Optional[str] = None, only_running: bool = False, From 90debd4b17cb497b3af08c17efef4181b1f4831f Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Mon, 19 May 2025 16:52:01 +0200 Subject: [PATCH 016/114] fix: keyword arguments --- cognition_objects/integration.py | 2 +- cognition_objects/integration_access.py | 2 +- enums.py | 13 +++---------- 3 files changed, 5 insertions(+), 12 deletions(-) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index f08f9586..d6f1d7cd 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -42,7 +42,6 @@ def get_all_by_project_id(project_id: str) -> List[CognitionIntegration]: def create( - project_id: str, user_id: str, name: str, description: str, @@ -54,6 +53,7 @@ def create( with_commit: bool = True, created_at: Optional[datetime] = None, id: Optional[str] = None, + project_id: Optional[str] = None, ) -> CognitionIntegration: if state not in CognitionMarkdownFileState.all(): raise HTTPException(status_code=400, detail=f"Invalid state: {state}") diff --git a/cognition_objects/integration_access.py b/cognition_objects/integration_access.py index ed247c67..6a037376 100644 --- a/cognition_objects/integration_access.py +++ b/cognition_objects/integration_access.py @@ -45,7 +45,7 @@ def create( created_at: Optional[datetime] = None, ) -> CognitionIntegrationAccess: integration_access: CognitionIntegrationAccess = CognitionIntegrationAccess( - org_id=org_id, + organization_id=org_id, created_by=user_id, created_at=created_at, integration_types=integration_types, diff --git a/enums.py b/enums.py index 39233717..03e3b146 100644 --- a/enums.py +++ b/enums.py @@ -680,16 +680,9 @@ class CognitionMarkdownFileState(Enum): FINISHED = "FINISHED" FAILED = "FAILED" - def all(): - return [ - CognitionMarkdownFileState.QUEUE.value, - CognitionMarkdownFileState.EXTRACTING.value, - CognitionMarkdownFileState.TOKENIZING.value, - CognitionMarkdownFileState.SPLITTING.value, - CognitionMarkdownFileState.TRANSFORMING.value, - CognitionMarkdownFileState.FINISHED.value, - CognitionMarkdownFileState.FAILED.value, - ] + @classmethod + def all(cls): + return [e.value for e in cls] class CognitionInterfaceType(Enum): From d09997482d92959d1bbc50a25dae7a3a201fda10 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Mon, 19 May 2025 23:50:15 +0200 Subject: [PATCH 017/114] perf: integration record --- enums.py | 1 + models.py | 33 +++++++++++++++++++++++++++++++-- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/enums.py b/enums.py index 03e3b146..4c8b9617 100644 --- a/enums.py +++ b/enums.py @@ -157,6 +157,7 @@ class Tablenames(Enum): FULL_ADMIN_ACCESS = "full_admin_access" INTEGRATION = "integration" INTEGRATION_ACCESS = "integration_access" + INTEGRATION_RECORD = "integration_record" def snake_case_to_pascal_case(self): # the type name (written in PascalCase) of a table is needed to create backrefs diff --git a/models.py b/models.py index 9c8e5766..819365b2 100644 --- a/models.py +++ b/models.py @@ -2085,7 +2085,7 @@ class CognitionIntegration(Base): created_by = Column( UUID(as_uuid=True), ForeignKey(f"{Tablenames.USER.value}.id", ondelete="SET NULL"), - index=True, + index=False, ) created_at = Column(DateTime, default=sql.func.now()) name = Column(String) @@ -2117,7 +2117,7 @@ class CognitionIntegrationAccess(Base): created_by = Column( UUID(as_uuid=True), ForeignKey(f"{Tablenames.USER.value}.id", ondelete="SET NULL"), - index=True, + index=False, ) created_at = Column(DateTime, default=sql.func.now()) organization_id = Column( @@ -2128,3 +2128,32 @@ class CognitionIntegrationAccess(Base): integration_types = Column( ARRAY(String) ) # of type enums.CognitionIntegrationType.*.value + + +class CognitionIntegrationRecord(Base): + __tablename__ = Tablenames.INTEGRATION_RECORD.value + __table_args__ = ( + UniqueConstraint( + "integration_id", + "record_id", + name="unique_record", + ), + {"schema": "cognition"}, + ) + id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + created_at = Column(DateTime, default=sql.func.now()) + created_by = Column( + UUID(as_uuid=True), + ForeignKey(f"{Tablenames.USER.value}.id", ondelete="SET_NULL"), + index=False, + ) + integration_id = Column( + UUID(as_uuid=True), + ForeignKey(f"cognition.{Tablenames.INTEGRATION.value}.id", ondelete="SET_NULL"), + index=True, + ) + record_id = Column( + UUID(as_uuid=True), + ForeignKey(f"cognition.{Tablenames.RECORD.value}.id", ondelete="CASCADE"), + index=True, + ) From 6fd36565c03d8133b0a7d308656214966a3b0fd5 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 20 May 2025 11:32:52 +0200 Subject: [PATCH 018/114] perf: add tokenizer --- cognition_objects/integration_access.py | 11 +++++++++ models.py | 30 +------------------------ 2 files changed, 12 insertions(+), 29 deletions(-) diff --git a/cognition_objects/integration_access.py b/cognition_objects/integration_access.py index 6a037376..dbd81a33 100644 --- a/cognition_objects/integration_access.py +++ b/cognition_objects/integration_access.py @@ -55,6 +55,17 @@ def create( return integration_access +def update( + id: str, + integration_types: List[CognitionIntegrationType], + with_commit: bool = True, +) -> CognitionIntegrationAccess: + integration_access = get_by_id(id) + integration_access.integration_types = integration_types + general.add(integration_access, with_commit) + return integration_access + + def delete(id: str, with_commit: bool = True) -> None: session.query(CognitionIntegrationAccess).filter( CognitionIntegrationAccess.id == id diff --git a/models.py b/models.py index 819365b2..54095d10 100644 --- a/models.py +++ b/models.py @@ -2090,6 +2090,7 @@ class CognitionIntegration(Base): created_at = Column(DateTime, default=sql.func.now()) name = Column(String) description = Column(String) + tokenizer = Column(String) state = Column(String) # of type enums.CognitionMarkdownFileState.*.value type = Column(String) # of type enums.CognitionIntegrationType.*.value config = Column(JSON) @@ -2128,32 +2129,3 @@ class CognitionIntegrationAccess(Base): integration_types = Column( ARRAY(String) ) # of type enums.CognitionIntegrationType.*.value - - -class CognitionIntegrationRecord(Base): - __tablename__ = Tablenames.INTEGRATION_RECORD.value - __table_args__ = ( - UniqueConstraint( - "integration_id", - "record_id", - name="unique_record", - ), - {"schema": "cognition"}, - ) - id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) - created_at = Column(DateTime, default=sql.func.now()) - created_by = Column( - UUID(as_uuid=True), - ForeignKey(f"{Tablenames.USER.value}.id", ondelete="SET_NULL"), - index=False, - ) - integration_id = Column( - UUID(as_uuid=True), - ForeignKey(f"cognition.{Tablenames.INTEGRATION.value}.id", ondelete="SET_NULL"), - index=True, - ) - record_id = Column( - UUID(as_uuid=True), - ForeignKey(f"cognition.{Tablenames.RECORD.value}.id", ondelete="CASCADE"), - index=True, - ) From 76fd2ff06d153d545957d717ddda5a85a73423b4 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 20 May 2025 11:55:05 +0200 Subject: [PATCH 019/114] perf: add update integration access --- cognition_objects/integration_access.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cognition_objects/integration_access.py b/cognition_objects/integration_access.py index dbd81a33..d3b857e9 100644 --- a/cognition_objects/integration_access.py +++ b/cognition_objects/integration_access.py @@ -40,7 +40,7 @@ def get( def create( org_id: str, user_id: str, - integration_types: List[CognitionIntegrationType], + integration_types: List[str], with_commit: bool = True, created_at: Optional[datetime] = None, ) -> CognitionIntegrationAccess: @@ -57,10 +57,12 @@ def create( def update( id: str, + org_id: str, integration_types: List[CognitionIntegrationType], with_commit: bool = True, ) -> CognitionIntegrationAccess: integration_access = get_by_id(id) + integration_access.organization_id = org_id integration_access.integration_types = integration_types general.add(integration_access, with_commit) return integration_access From 23099d3db100c01b50d5be8b7934cd97d8e78091 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 20 May 2025 12:15:21 +0200 Subject: [PATCH 020/114] perf: update integration endpoints --- cognition_objects/integration.py | 5 +++++ cognition_objects/integration_access.py | 10 ++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index d6f1d7cd..b30b6102 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -45,6 +45,7 @@ def create( user_id: str, name: str, description: str, + tokenizer: str, state: str, integration_type: CognitionIntegrationType, integration_config: Dict, @@ -64,6 +65,7 @@ def create( created_at=created_at, name=name, description=description, + tokenizer=tokenizer, state=state, type=integration_type.value, config=integration_config, @@ -79,6 +81,7 @@ def update( id: str, name: Optional[str] = None, description: Optional[str] = None, + tokenizer: Optional[str] = None, state: Optional[CognitionMarkdownFileState] = None, integration_config: Optional[int] = None, llm_config: Optional[Dict] = None, @@ -92,6 +95,8 @@ def update( integration.name = name if description is not None: integration.description = description + if tokenizer is not None: + integration.tokenizer = tokenizer if state is not None: integration.state = state.value if integration_config is not None: diff --git a/cognition_objects/integration_access.py b/cognition_objects/integration_access.py index d3b857e9..522239cf 100644 --- a/cognition_objects/integration_access.py +++ b/cognition_objects/integration_access.py @@ -57,13 +57,15 @@ def create( def update( id: str, - org_id: str, - integration_types: List[CognitionIntegrationType], + org_id: Optional[str] = None, + integration_types: Optional[List[str]] = None, with_commit: bool = True, ) -> CognitionIntegrationAccess: integration_access = get_by_id(id) - integration_access.organization_id = org_id - integration_access.integration_types = integration_types + if org_id: + integration_access.organization_id = org_id + if integration_types: + integration_access.integration_types = integration_types general.add(integration_access, with_commit) return integration_access From 5d2d503ca03ee76c5e2f3500dca9cb317626da1a Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 20 May 2025 15:23:37 +0200 Subject: [PATCH 021/114] perf: add get endpoint --- cognition_objects/integration.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index b30b6102..f4d1941c 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -4,7 +4,7 @@ from ..business_objects import general from ..session import session -from ..models import CognitionIntegration +from ..models import CognitionIntegration, Project from ..enums import ( CognitionMarkdownFileState, CognitionIntegrationType, @@ -19,15 +19,17 @@ def get_by_id(id: str) -> CognitionIntegration: ) -def get(project_id: str, name: str) -> CognitionIntegration: - return ( +def get( + org_id: str, integration_type: Optional[str] = None +) -> List[CognitionIntegration]: + query = ( session.query(CognitionIntegration) - .filter( - CognitionIntegration.project_id == project_id, - CognitionIntegration.name == name, - ) - .first() + .join(Project, CognitionIntegration.project_id == Project.id) + .filter(Project.organization_id == org_id) ) + if integration_type: + query = query.filter(CognitionIntegration.type == integration_type) + return query.order_by(CognitionIntegration.created_at).all() def get_all_by_project_id(project_id: str) -> List[CognitionIntegration]: From a2aa966970732c0b43b81398d9abde7b14848625 Mon Sep 17 00:00:00 2001 From: Lina Date: Wed, 21 May 2025 12:16:11 +0200 Subject: [PATCH 022/114] Oidc field in the users table --- models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/models.py b/models.py index 1dae54d2..7da43556 100644 --- a/models.py +++ b/models.py @@ -226,6 +226,7 @@ class User(Base): created_at = Column(DateTime, default=sql.func.now()) metadata_public = Column(JSON) sso_provider = Column(String) + oidc_identifier = Column(String) class Team(Base): From 4cd321f21b83c8fa10728fd0b296f6b65a484059 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Mon, 26 May 2025 10:15:00 +0200 Subject: [PATCH 023/114] perf: add org_id to integration provider --- cognition_objects/integration.py | 19 +++++++++++++------ models.py | 5 +++++ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index f4d1941c..98c1656f 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -22,10 +22,8 @@ def get_by_id(id: str) -> CognitionIntegration: def get( org_id: str, integration_type: Optional[str] = None ) -> List[CognitionIntegration]: - query = ( - session.query(CognitionIntegration) - .join(Project, CognitionIntegration.project_id == Project.id) - .filter(Project.organization_id == org_id) + query = session.query(CognitionIntegration).filter( + CognitionIntegration.organization_id == org_id ) if integration_type: query = query.filter(CognitionIntegration.type == integration_type) @@ -132,6 +130,15 @@ def clear_history(id: str) -> None: general.add(integration, True) -def delete(id: str, with_commit: bool = True) -> None: - session.query(CognitionIntegration).filter(CognitionIntegration.id == id).delete() +def delete_many( + ids: List[str], delete_refinery_projects: bool = False, with_commit: bool = True +) -> None: + integrations = session.query(CognitionIntegration).filter( + CognitionIntegration.id.in_(ids) + ) + if delete_refinery_projects: + session.query(Project).filter( + Project.id.in_(filter(None, [i.project_id for i in integrations])) + ).delete(synchronize_session=False) + integrations.delete(synchronize_session=False) general.flush_or_commit(with_commit) diff --git a/models.py b/models.py index 54095d10..f9cf37ed 100644 --- a/models.py +++ b/models.py @@ -2076,6 +2076,11 @@ class CognitionIntegration(Base): __tablename__ = Tablenames.INTEGRATION.value __table_args__ = {"schema": "cognition"} id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + organization_id = Column( + UUID(as_uuid=True), + ForeignKey(f"{Tablenames.ORGANIZATION.value}.id", ondelete="CASCADE"), + index=True, + ) project_id = Column( UUID(as_uuid=True), ForeignKey(f"{Tablenames.PROJECT.value}.id", ondelete="CASCADE"), From 0467f294a3fccd68ed3b6a823eb0f9e6c0a79ce5 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Mon, 26 May 2025 12:07:27 +0200 Subject: [PATCH 024/114] perf: add org_id support to integration --- cognition_objects/integration.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index 98c1656f..63e2ba3c 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -42,6 +42,7 @@ def get_all_by_project_id(project_id: str) -> List[CognitionIntegration]: def create( + org_id: str, user_id: str, name: str, description: str, @@ -60,6 +61,7 @@ def create( raise HTTPException(status_code=400, detail=f"Invalid state: {state}") integration: CognitionIntegration = CognitionIntegration( id=id, + organization_id=org_id, project_id=project_id, created_by=user_id, created_at=created_at, From 888a5427fae6cb7d20dbcaac7d3f22f40b37fc51 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Mon, 26 May 2025 16:25:32 +0200 Subject: [PATCH 025/114] perf: add record delta criteria --- business_objects/record.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/business_objects/record.py b/business_objects/record.py index 03412437..7fd02bdc 100644 --- a/business_objects/record.py +++ b/business_objects/record.py @@ -609,7 +609,7 @@ def count_missing_tokenized_records(project_id: str) -> int: query = f""" SELECT COUNT(*) FROM ( - {get_records_without_tokenization(project_id, None, query_only = True)} + {get_records_without_tokenization(project_id, None, query_only=True)} ) record_query """ return general.execute_first(query)[0] @@ -913,3 +913,14 @@ def get_first_no_text_column(project_id: str, record_id: str) -> str: WHERE r.project_id = '{project_id}' AND r.id = '{record_id}' """ return general.execute_first(query)[0] + + +def get_integration_delta_record(project_id: str, source: str) -> Record: + project_id = prevent_sql_injection(project_id, isinstance(project_id, str)) + source = prevent_sql_injection(source, isinstance(source, str)) + query = f""" + SELECT r.* + FROM record r + WHERE r.project_id = '{project_id}' AND r.data->>'source' = '{source}' + """ + return general.execute_first(query) From 8af9e394be1566eda08e5c861ce869a4da8708cf Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Mon, 26 May 2025 16:58:38 +0200 Subject: [PATCH 026/114] fix: task execution finish on failed integration --- cognition_objects/integration.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index 63e2ba3c..d6851eb7 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -119,7 +119,10 @@ def execution_finished(id: str) -> bool: session.query(CognitionIntegration) .filter( CognitionIntegration.id == id, - CognitionIntegration.state == CognitionMarkdownFileState.FINISHED.value, + CognitionIntegration.state.in_( + CognitionMarkdownFileState.FINISHED.value, + CognitionMarkdownFileState.FAILED.value, + ), ) .first() ) From 99494f8bbf8dff860e821f29c4f302fb1593b421 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Mon, 26 May 2025 17:12:53 +0200 Subject: [PATCH 027/114] perf: add integration finished_at --- cognition_objects/integration.py | 8 +++++--- models.py | 1 + 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index d6851eb7..f5e4d363 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -54,6 +54,7 @@ def create( extract_history: Optional[Dict] = {}, with_commit: bool = True, created_at: Optional[datetime] = None, + finished_at: Optional[datetime] = None, id: Optional[str] = None, project_id: Optional[str] = None, ) -> CognitionIntegration: @@ -65,6 +66,7 @@ def create( project_id=project_id, created_by=user_id, created_at=created_at, + finished_at=finished_at, name=name, description=description, tokenizer=tokenizer, @@ -88,7 +90,7 @@ def update( integration_config: Optional[int] = None, llm_config: Optional[Dict] = None, error_message: Optional[str] = None, - extract_history: Optional[Dict] = None, + finished_at: Optional[datetime] = None, with_commit: bool = True, ) -> CognitionIntegration: integration: CognitionIntegration = get_by_id(id) @@ -107,8 +109,8 @@ def update( integration.llm_config = llm_config if error_message is not None: integration.error_message = error_message - if extract_history is not None: - integration.extract_history = extract_history + if finished_at is not None: + integration.finished_at = finished_at general.add(integration, with_commit) return integration diff --git a/models.py b/models.py index 2fe40ccb..a9ec83c2 100644 --- a/models.py +++ b/models.py @@ -2100,6 +2100,7 @@ class CognitionIntegration(Base): index=False, ) created_at = Column(DateTime, default=sql.func.now()) + finished_at = Column(DateTime) name = Column(String) description = Column(String) tokenizer = Column(String) From 5989801037d1f8778c1136965f461ea2693b13cc Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Mon, 26 May 2025 17:18:04 +0200 Subject: [PATCH 028/114] perf: add started_at --- cognition_objects/integration.py | 3 +++ models.py | 1 + 2 files changed, 4 insertions(+) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index f5e4d363..09a5ce75 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -90,6 +90,7 @@ def update( integration_config: Optional[int] = None, llm_config: Optional[Dict] = None, error_message: Optional[str] = None, + started_at: Optional[datetime] = None, finished_at: Optional[datetime] = None, with_commit: bool = True, ) -> CognitionIntegration: @@ -109,6 +110,8 @@ def update( integration.llm_config = llm_config if error_message is not None: integration.error_message = error_message + if started_at is not None: + integration.finished_at = started_at if finished_at is not None: integration.finished_at = finished_at diff --git a/models.py b/models.py index a9ec83c2..c479c920 100644 --- a/models.py +++ b/models.py @@ -2100,6 +2100,7 @@ class CognitionIntegration(Base): index=False, ) created_at = Column(DateTime, default=sql.func.now()) + started_at = Column(DateTime) finished_at = Column(DateTime) name = Column(String) description = Column(String) From bec5f20eb5593593273d354af5e95e71200c5aef Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Mon, 26 May 2025 17:24:25 +0200 Subject: [PATCH 029/114] fix: started_at - finished_at syntax error --- cognition_objects/integration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index 09a5ce75..86528b0a 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -111,7 +111,7 @@ def update( if error_message is not None: integration.error_message = error_message if started_at is not None: - integration.finished_at = started_at + integration.started_at = started_at if finished_at is not None: integration.finished_at = finished_at From 8898a3cb6650f2087d7cb368e5615e6b12745ca6 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 27 May 2025 10:12:06 +0200 Subject: [PATCH 030/114] perf: add integration records --- enums.py | 6 +- integration_objects/__init__.py | 135 ++++++++++++++++++++++++++++ integration_objects/github_file.py | 89 ++++++++++++++++++ integration_objects/github_issue.py | 101 +++++++++++++++++++++ integration_objects/pdf.py | 95 ++++++++++++++++++++ models.py | 103 +++++++++++++++++++-- 6 files changed, 523 insertions(+), 6 deletions(-) create mode 100644 integration_objects/__init__.py create mode 100644 integration_objects/github_file.py create mode 100644 integration_objects/github_issue.py create mode 100644 integration_objects/pdf.py diff --git a/enums.py b/enums.py index 4c8b9617..a1c9897c 100644 --- a/enums.py +++ b/enums.py @@ -157,7 +157,11 @@ class Tablenames(Enum): FULL_ADMIN_ACCESS = "full_admin_access" INTEGRATION = "integration" INTEGRATION_ACCESS = "integration_access" - INTEGRATION_RECORD = "integration_record" + + # Individial integrations + INTEGRATION_GITHUB_FILE = "github_file" + INTEGRATION_GITHUB_ISSUE = "github_issue" + INTEGRATION_PDF = "pdf" def snake_case_to_pascal_case(self): # the type name (written in PascalCase) of a table is needed to create backrefs diff --git a/integration_objects/__init__.py b/integration_objects/__init__.py new file mode 100644 index 00000000..2b5190a0 --- /dev/null +++ b/integration_objects/__init__.py @@ -0,0 +1,135 @@ +from typing import List, Optional + +from datetime import datetime + +from ..business_objects import general +from ..cognition_objects import integration as integration_db_bo +from ..session import session + + +def get_by_id(IntegrationModel, id: str) -> object: + return session.query(IntegrationModel).filter(IntegrationModel.id == id).first() + + +def get_by_running_id(IntegrationModel, integration_id: str, running_id: int) -> object: + return ( + session.query(IntegrationModel) + .filter( + IntegrationModel.integration_id == integration_id, + IntegrationModel.running_id == running_id, + ) + .first() + ) + + +def get_all_by_integration_id(IntegrationModel, integration_id: str) -> List[object]: + return ( + session.query(IntegrationModel) + .filter(IntegrationModel.integration_id == integration_id) + .order_by(IntegrationModel.created_at) + .all() + ) + + +def get_all_by_project_id(IntegrationModel, project_id: str) -> List[object]: + integrations = integration_db_bo.get_all_by_project_id(project_id) + return ( + session.query(IntegrationModel) + .filter( + IntegrationModel.integration_id.in_([i.id for i in integrations]), + ) + .order_by(IntegrationModel.created_at.asc()) + .all() + ) + + +def create( + IntegrationModel, + created_by: str, + integration_id: str, + running_id: int, + source: str, + delta_criteria: str, + minio_file_name: str, + created_at: Optional[datetime] = None, + id: Optional[str] = None, + with_commit: bool = True, + **kwargs, +) -> object: + integration_record = IntegrationModel( + created_by=created_by, + integration_id=integration_id, + running_id=running_id, + source=source, + delta_criteria=delta_criteria, + minio_file_name=minio_file_name, + created_at=created_at, + id=id, + **kwargs, + ) + + general.add(integration_record, with_commit) + + return integration_record + + +def update( + IntegrationModel, + id: str, + updated_by: str, + running_id: Optional[int] = None, + source: Optional[str] = None, + delta_criteria: Optional[str] = None, + minio_file_name: Optional[str] = None, + updated_at: Optional[datetime] = None, + with_commit: bool = True, + **kwargs, +) -> object: + integration_record = get_by_id(IntegrationModel, id) + integration_record.updated_by = updated_by + + if running_id is not None: + integration_record.running_id = running_id + if source is not None: + integration_record.source = source + if delta_criteria is not None: + integration_record.delta_criteria = delta_criteria + if minio_file_name is not None: + integration_record.minio_file_name = minio_file_name + if updated_at is not None: + integration_record.updated_at = updated_at + + for key, value in kwargs.items(): + if not hasattr(integration_record, key): + raise ValueError(f"Invalid field '{key}' for {IntegrationModel.__name__}") + if value is not None: + setattr(integration_record, key, value) + + general.add(integration_record, with_commit) + + return integration_record + + +def delete_many(IntegrationModel, ids: List[str], with_commit: bool = False) -> None: + integration_records = session.query(IntegrationModel).filter( + IntegrationModel.id.in_(ids) + ) + integration_records.delete(synchronize_session=False) + general.flush_or_commit(with_commit) + + +def clear_history(IntegrationModel, id: str, with_commit: bool = False) -> None: + integration_record = get_by_id(IntegrationModel, id) + integration_record.delta_criteria = None + general.add(integration_record, with_commit) + + +__all__ = [ + "create", + "update", + "delete_many", + "get_by_id", + "get_by_running_id", + "get_all_by_integration_id", + "get_all_by_project_id", +] diff --git a/integration_objects/github_file.py b/integration_objects/github_file.py new file mode 100644 index 00000000..0f527279 --- /dev/null +++ b/integration_objects/github_file.py @@ -0,0 +1,89 @@ +from typing import List, Optional +from datetime import datetime + +from ..models import IntegrationGithubFile +from .. import integration_objects + + +def get_by_id(id: str) -> IntegrationGithubFile: + return integration_objects.get_by_id(IntegrationGithubFile, id) + + +def get_by_running_id(integration_id: str, running_id: int) -> IntegrationGithubFile: + return integration_objects.get_by_running_id( + IntegrationGithubFile, integration_id, running_id + ) + + +def get_all_by_integration_id(integration_id: str) -> List[IntegrationGithubFile]: + return integration_objects.get_all_by_integration_id( + IntegrationGithubFile, integration_id + ) + + +def get_all_by_project_id(project_id: str) -> List[IntegrationGithubFile]: + return integration_objects.get_all_by_project_id(IntegrationGithubFile, project_id) + + +def create( + created_by: str, + integration_id: str, + running_id: int, + source: str, + path: str, + sha: str, + delta_criteria: str, + minio_file_name: str, + created_at: Optional[datetime] = None, + id: Optional[str] = None, + with_commit: bool = True, +) -> IntegrationGithubFile: + return integration_objects.create( + IntegrationGithubFile, + created_by=created_by, + integration_id=integration_id, + running_id=running_id, + source=source, + path=path, + sha=sha, + delta_criteria=delta_criteria, + minio_file_name=minio_file_name, + created_at=created_at, + id=id, + with_commit=with_commit, + ) + + +def update( + id: str, + updated_by: str, + running_id: Optional[int] = None, + source: Optional[str] = None, + path: Optional[str] = None, + sha: Optional[str] = None, + delta_criteria: Optional[str] = None, + minio_file_name: Optional[str] = None, + updated_at: Optional[datetime] = None, + with_commit: bool = True, +) -> IntegrationGithubFile: + return integration_objects.update( + IntegrationGithubFile, + id=id, + updated_by=updated_by, + running_id=running_id, + source=source, + path=path, + sha=sha, + delta_criteria=delta_criteria, + minio_file_name=minio_file_name, + updated_at=updated_at, + with_commit=with_commit, + ) + + +def clear_history(id: str, with_commit: bool = True) -> None: + integration_objects.clear_history(IntegrationGithubFile, id, with_commit) + + +def delete_many(ids: List[str], with_commit: bool = True) -> None: + integration_objects.delete_many(IntegrationGithubFile, ids, with_commit) diff --git a/integration_objects/github_issue.py b/integration_objects/github_issue.py new file mode 100644 index 00000000..0b3c5b9f --- /dev/null +++ b/integration_objects/github_issue.py @@ -0,0 +1,101 @@ +from typing import List, Optional +from datetime import datetime + +from ..models import IntegrationGithubIssue +from .. import integration_objects + + +def get_by_id(id: str) -> IntegrationGithubIssue: + return integration_objects.get_by_id(IntegrationGithubIssue, id) + + +def get_by_running_id(integration_id: str, running_id: int) -> IntegrationGithubIssue: + return integration_objects.get_by_running_id( + IntegrationGithubIssue, integration_id, running_id + ) + + +def get_all_by_integration_id(integration_id: str) -> List[IntegrationGithubIssue]: + return integration_objects.get_all_by_integration_id( + IntegrationGithubIssue, integration_id + ) + + +def get_all_by_project_id(project_id: str) -> List[IntegrationGithubIssue]: + return integration_objects.get_all_by_project_id(IntegrationGithubIssue, project_id) + + +def create( + created_by: str, + integration_id: str, + running_id: int, + source: str, + url: str, + state: str, + number: str, + delta_criteria: str, + minio_file_name: str, + milestone: Optional[str] = None, + assignee: Optional[str] = None, + created_at: Optional[datetime] = None, + id: Optional[str] = None, + with_commit: bool = True, +) -> IntegrationGithubIssue: + return integration_objects.create( + IntegrationGithubIssue, + created_by=created_by, + integration_id=integration_id, + running_id=running_id, + source=source, + url=url, + state=state, + number=number, + milestone=milestone, + assignee=assignee, + delta_criteria=delta_criteria, + minio_file_name=minio_file_name, + created_at=created_at, + id=id, + with_commit=with_commit, + ) + + +def update( + id: str, + updated_by: str, + running_id: Optional[int] = None, + source: Optional[str] = None, + url: Optional[str] = None, + state: Optional[str] = None, + number: Optional[str] = None, + milestone: Optional[str] = None, + assignee: Optional[str] = None, + delta_criteria: Optional[str] = None, + minio_file_name: Optional[str] = None, + updated_at: Optional[datetime] = None, + with_commit: bool = True, +) -> IntegrationGithubIssue: + return integration_objects.update( + IntegrationGithubIssue, + id=id, + updated_by=updated_by, + running_id=running_id, + source=source, + url=url, + state=state, + number=number, + milestone=milestone, + assignee=assignee, + delta_criteria=delta_criteria, + minio_file_name=minio_file_name, + updated_at=updated_at, + with_commit=with_commit, + ) + + +def clear_history(id: str, with_commit: bool = True) -> None: + integration_objects.clear_history(IntegrationGithubIssue, id, with_commit) + + +def delete_many(ids: List[str], with_commit: bool = True) -> None: + integration_objects.delete_many(IntegrationGithubIssue, ids, with_commit) diff --git a/integration_objects/pdf.py b/integration_objects/pdf.py new file mode 100644 index 00000000..3d82a859 --- /dev/null +++ b/integration_objects/pdf.py @@ -0,0 +1,95 @@ +from typing import List, Optional +from datetime import datetime + +from ..models import IntegrationPdf +from .. import integration_objects + + +def get_by_id(id: str) -> IntegrationPdf: + return integration_objects.get_by_id(IntegrationPdf, id) + + +def get_by_running_id(integration_id: str, running_id: int) -> IntegrationPdf: + return integration_objects.get_by_running_id( + IntegrationPdf, integration_id, running_id + ) + + +def get_all_by_integration_id(integration_id: str) -> List[IntegrationPdf]: + return integration_objects.get_all_by_integration_id(IntegrationPdf, integration_id) + + +def get_all_by_project_id(project_id: str) -> List[IntegrationPdf]: + return integration_objects.get_all_by_project_id(IntegrationPdf, project_id) + + +def create( + created_by: str, + integration_id: str, + running_id: int, + source: str, + file_path: str, + page: int, + total_pages: int, + title: str, + delta_criteria: str, + minio_file_name: str, + created_at: Optional[datetime] = None, + id: Optional[str] = None, + with_commit: bool = True, +) -> IntegrationPdf: + return integration_objects.create( + IntegrationPdf, + created_by=created_by, + integration_id=integration_id, + running_id=running_id, + source=source, + file_path=file_path, + page=page, + total_pages=total_pages, + title=title, + delta_criteria=delta_criteria, + minio_file_name=minio_file_name, + created_at=created_at, + id=id, + with_commit=with_commit, + ) + + +def update( + id: str, + updated_by: str, + running_id: Optional[int] = None, + source: Optional[str] = None, + file_path: Optional[str] = None, + page: Optional[int] = None, + total_pages: Optional[int] = None, + title: Optional[str] = None, + delta_criteria: Optional[str] = None, + minio_file_name: Optional[str] = None, + updated_at: Optional[datetime] = None, + with_commit: bool = True, +) -> IntegrationPdf: + return integration_objects.update( + IntegrationPdf, + id=id, + updated_by=updated_by, + running_id=running_id, + source=source, + file_path=file_path, + page=page, + total_pages=total_pages, + title=title, + delta_criteria=delta_criteria, + minio_file_name=minio_file_name, + updated_at=updated_at, + with_commit=with_commit, + ) + + +def clear_history(id: str, with_commit: bool = True) -> None: + integration_objects.clear_history(IntegrationPdf, id, with_commit) + + +def delete_many(ids: List[str], with_commit: bool = True) -> None: + integration_objects.delete_many(IntegrationPdf, ids, with_commit) diff --git a/models.py b/models.py index c479c920..eb091a17 100644 --- a/models.py +++ b/models.py @@ -2118,11 +2118,6 @@ class CognitionIntegration(Base): llm_config = Column(JSON) error_message = Column(String) - extract_history = Column(JSON) - # Information relevant for "delta" extraction. Varies based on the integration type. - # e.g. for github issue => last timestamp - # e.g. for github file => file name + SHA - # e.g. for PDF => file name + page number class CognitionIntegrationAccess(Base): @@ -2143,3 +2138,101 @@ class CognitionIntegrationAccess(Base): integration_types = Column( ARRAY(String) ) # of type enums.CognitionIntegrationType.*.value + + +class IntegrationGithubFile(Base): + __tablename__ = Tablenames.INTEGRATION_GITHUB_FILE.value + __table_args__ = {"schema": "integration"} + id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + created_by = Column( + UUID(as_uuid=True), + ForeignKey(f"{Tablenames.USER.value}.id", ondelete="SET NULL"), + index=False, + ) + updated_by = Column( + UUID(as_uuid=True), + ForeignKey(f"{Tablenames.USER.value}.id", ondelete="SET NULL"), + index=False, + nullable=True, + ) + created_at = Column(DateTime, default=sql.func.now()) + updated_at = Column(DateTime, default=None, onupdate=sql.func.now()) + integration_id = Column( + UUID(as_uuid=True), + ForeignKey(f"cognition.{Tablenames.INTEGRATION.value}.id", ondelete="CASCADE"), + index=True, + ) + running_id = Column(Integer, index=True) + source = Column(String, index=True) + path = Column(String) + sha = Column(String) + + delta_criteria = Column(JSON) + minio_file_name = Column(String) + + +class IntegrationGithubIssue(Base): + __tablename__ = Tablenames.INTEGRATION_GITHUB_ISSUE.value + __table_args__ = {"schema": "integration"} + id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + created_by = Column( + UUID(as_uuid=True), + ForeignKey(f"{Tablenames.USER.value}.id", ondelete="SET NULL"), + index=False, + ) + updated_by = Column( + UUID(as_uuid=True), + ForeignKey(f"{Tablenames.USER.value}.id", ondelete="SET NULL"), + index=False, + nullable=True, + ) + created_at = Column(DateTime, default=sql.func.now()) + updated_at = Column(DateTime, default=None, onupdate=sql.func.now()) + integration_id = Column( + UUID(as_uuid=True), + ForeignKey(f"cognition.{Tablenames.INTEGRATION.value}.id", ondelete="CASCADE"), + index=True, + ) + running_id = Column(Integer, index=True) + source = Column(String, index=True) + url = Column(String) + state = Column(String) + assignee = Column(String) + milestone = Column(String) + number = Column(Integer) + + delta_criteria = Column(JSON) + minio_file_name = Column(String) + + +class IntegrationPdf(Base): + __tablename__ = Tablenames.INTEGRATION_PDF.value + __table_args__ = {"schema": "integration"} + id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + created_by = Column( + UUID(as_uuid=True), + ForeignKey(f"{Tablenames.USER.value}.id", ondelete="SET NULL"), + index=False, + ) + updated_by = Column( + UUID(as_uuid=True), + ForeignKey(f"{Tablenames.USER.value}.id", ondelete="SET NULL"), + index=False, + nullable=True, + ) + created_at = Column(DateTime, default=sql.func.now()) + updated_at = Column(DateTime, default=None, onupdate=sql.func.now()) + integration_id = Column( + UUID(as_uuid=True), + ForeignKey(f"cognition.{Tablenames.INTEGRATION.value}.id", ondelete="CASCADE"), + index=True, + ) + running_id = Column(Integer, index=True) + source = Column(String, index=True) + file_path = Column(String) + page = Column(Integer) + total_pages = Column(Integer) + title = Column(String) + + delta_criteria = Column(JSON) + minio_file_name = Column(String) From 66e8a0cae2a52206c6e281ce1895342ec2c80ceb Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 27 May 2025 12:14:45 +0200 Subject: [PATCH 031/114] perf: add integration tables --- cognition_objects/integration.py | 21 +++++++-- enums.py | 49 +++++++++++++++++++++ integration_objects/__init__.py | 33 +++++++-------- integration_objects/github_file.py | 54 +++++++++-------------- integration_objects/github_issue.py | 66 +++++++++-------------------- integration_objects/pdf.py | 64 ++++++++++------------------ 6 files changed, 143 insertions(+), 144 deletions(-) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index 86528b0a..c471034a 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -1,6 +1,7 @@ from typing import List, Optional, Dict from datetime import datetime from fastapi import HTTPException +from sqlalchemy import func from ..business_objects import general from ..session import session @@ -41,6 +42,18 @@ def get_all_by_project_id(project_id: str) -> List[CognitionIntegration]: ) +def count_org_integrations(org_id: str) -> int: + counts = ( + session.query(CognitionIntegration.type, func.count(CognitionIntegration.id)) + .filter( + CognitionIntegration.organization_id == org_id, + ) + .group_by(CognitionIntegration.type) + .all() + ) + return {cognition_type: count for cognition_type, count in counts} + + def create( org_id: str, user_id: str, @@ -51,7 +64,6 @@ def create( integration_type: CognitionIntegrationType, integration_config: Dict, llm_config: Dict, - extract_history: Optional[Dict] = {}, with_commit: bool = True, created_at: Optional[datetime] = None, finished_at: Optional[datetime] = None, @@ -74,7 +86,6 @@ def create( type=integration_type.value, config=integration_config, llm_config=llm_config, - extract_history=extract_history, ) general.add(integration, with_commit) @@ -125,8 +136,10 @@ def execution_finished(id: str) -> bool: .filter( CognitionIntegration.id == id, CognitionIntegration.state.in_( - CognitionMarkdownFileState.FINISHED.value, - CognitionMarkdownFileState.FAILED.value, + [ + CognitionMarkdownFileState.FINISHED.value, + CognitionMarkdownFileState.FAILED.value, + ] ), ) .first() diff --git a/enums.py b/enums.py index a1c9897c..96fd1dd3 100644 --- a/enums.py +++ b/enums.py @@ -905,3 +905,52 @@ def from_string(value: str): raise KeyError( f"Could not parse CognitionIntegrationType from string '{changed_value}'" ) + + +class IntegrationMetadata(Enum): + """ + Enum for controlling and documenting the dynamic metadata fields associated with different integration types. + + The `IntegrationMetadata` enum defines which metadata keys are expected and allowed for each integration type + (e.g., GITHUB_FILE, GITHUB_ISSUE, PDF). Each member contains a set of keys specific to that integration, while + the `__DEFAULT__` member defines a set of common metadata fields (`source`, `delta_criteria`, `minio_file_name`) + that are always included. + + During extraction (see the `extract` functions in the integration handlers), metadata is dynamically attached to + each document according to the rules defined here. This ensures that only the relevant and allowed metadata fields + are published to the database for each integration type. + + The enum provides utility methods: + - `from_string(value: str)`: Returns the union of default and integration-specific metadata keys for a given type. + - `from_table_name(table_name: str)`: Looks up metadata keys based on the integration's table name. + + This enum is used by the integration object logic (see `submodules/model/integration_objects/__init__.py`) to + validate and filter metadata before persisting it, ensuring consistency and preventing unwanted fields from being + stored in the database. + + Example: + IntegrationMetadata.from_string("PDF") + # returns: {"source", "delta_criteria", "minio_file_name", "file_path", "page", "total_pages", "title"} + """ + + __DEFAULT__ = {"source", "delta_criteria", "minio_file_name"} + + GITHUB_FILE = {"path", "sha", "code_language"} + GITHUB_ISSUE = {"url", "state", "number", "assignee", "milestone"} + PDF = {"file_path", "page", "total_pages", "title"} + + @staticmethod + def from_string(value: str): + default = IntegrationMetadata.__DEFAULT__.value + + try: + metadata_keys = IntegrationMetadata[value].value + except KeyError: + raise ValueError( + f"Could not parse IntegrationMetadata from string '{value}'" + ) + return default.union(metadata_keys) + + @staticmethod + def from_table_name(table_name: str): + raise IntegrationMetadata.from_string(table_name.upper()) diff --git a/integration_objects/__init__.py b/integration_objects/__init__.py index 2b5190a0..47f1fe99 100644 --- a/integration_objects/__init__.py +++ b/integration_objects/__init__.py @@ -5,6 +5,7 @@ from ..business_objects import general from ..cognition_objects import integration as integration_db_bo from ..session import session +from ..enums import IntegrationMetadata def get_by_id(IntegrationModel, id: str) -> object: @@ -48,21 +49,16 @@ def create( created_by: str, integration_id: str, running_id: int, - source: str, - delta_criteria: str, - minio_file_name: str, created_at: Optional[datetime] = None, id: Optional[str] = None, with_commit: bool = True, - **kwargs, + **metadata, ) -> object: + kwargs = __get_supported_metadata(IntegrationModel.__tablename__, **metadata) integration_record = IntegrationModel( created_by=created_by, integration_id=integration_id, running_id=running_id, - source=source, - delta_criteria=delta_criteria, - minio_file_name=minio_file_name, created_at=created_at, id=id, **kwargs, @@ -78,30 +74,24 @@ def update( id: str, updated_by: str, running_id: Optional[int] = None, - source: Optional[str] = None, - delta_criteria: Optional[str] = None, - minio_file_name: Optional[str] = None, updated_at: Optional[datetime] = None, with_commit: bool = True, - **kwargs, + **metadata, ) -> object: integration_record = get_by_id(IntegrationModel, id) integration_record.updated_by = updated_by if running_id is not None: integration_record.running_id = running_id - if source is not None: - integration_record.source = source - if delta_criteria is not None: - integration_record.delta_criteria = delta_criteria - if minio_file_name is not None: - integration_record.minio_file_name = minio_file_name if updated_at is not None: integration_record.updated_at = updated_at + kwargs = __get_supported_metadata(IntegrationModel.__tablename__, **metadata) for key, value in kwargs.items(): if not hasattr(integration_record, key): - raise ValueError(f"Invalid field '{key}' for {IntegrationModel.__name__}") + raise ValueError( + f"Invalid field '{key}' for {IntegrationModel.__tablename__}" + ) if value is not None: setattr(integration_record, key, value) @@ -124,6 +114,13 @@ def clear_history(IntegrationModel, id: str, with_commit: bool = False) -> None: general.add(integration_record, with_commit) +def __get_supported_metadata(table_name: str, **kwargs) -> None: + supported_keys = IntegrationMetadata.from_table_name(table_name) + return { + key: kwargs[key] for key in supported_keys.value.intersection(kwargs.keys()) + } + + __all__ = [ "create", "update", diff --git a/integration_objects/github_file.py b/integration_objects/github_file.py index 0f527279..7233b78c 100644 --- a/integration_objects/github_file.py +++ b/integration_objects/github_file.py @@ -4,53 +4,47 @@ from ..models import IntegrationGithubFile from .. import integration_objects +IntegrationModel = IntegrationGithubFile -def get_by_id(id: str) -> IntegrationGithubFile: - return integration_objects.get_by_id(IntegrationGithubFile, id) +def get_by_id(id: str) -> IntegrationModel: + return integration_objects.get_by_id(IntegrationModel, id) -def get_by_running_id(integration_id: str, running_id: int) -> IntegrationGithubFile: + +def get_by_running_id(integration_id: str, running_id: int) -> IntegrationModel: return integration_objects.get_by_running_id( - IntegrationGithubFile, integration_id, running_id + IntegrationModel, integration_id, running_id ) -def get_all_by_integration_id(integration_id: str) -> List[IntegrationGithubFile]: +def get_all_by_integration_id(integration_id: str) -> List[IntegrationModel]: return integration_objects.get_all_by_integration_id( - IntegrationGithubFile, integration_id + IntegrationModel, integration_id ) -def get_all_by_project_id(project_id: str) -> List[IntegrationGithubFile]: - return integration_objects.get_all_by_project_id(IntegrationGithubFile, project_id) +def get_all_by_project_id(project_id: str) -> List[IntegrationModel]: + return integration_objects.get_all_by_project_id(IntegrationModel, project_id) def create( created_by: str, integration_id: str, running_id: int, - source: str, - path: str, - sha: str, - delta_criteria: str, - minio_file_name: str, created_at: Optional[datetime] = None, id: Optional[str] = None, with_commit: bool = True, -) -> IntegrationGithubFile: + **metadata +) -> IntegrationModel: return integration_objects.create( - IntegrationGithubFile, + IntegrationModel, created_by=created_by, integration_id=integration_id, running_id=running_id, - source=source, - path=path, - sha=sha, - delta_criteria=delta_criteria, - minio_file_name=minio_file_name, created_at=created_at, id=id, with_commit=with_commit, + **metadata ) @@ -58,32 +52,24 @@ def update( id: str, updated_by: str, running_id: Optional[int] = None, - source: Optional[str] = None, - path: Optional[str] = None, - sha: Optional[str] = None, - delta_criteria: Optional[str] = None, - minio_file_name: Optional[str] = None, updated_at: Optional[datetime] = None, with_commit: bool = True, -) -> IntegrationGithubFile: + **metadata +) -> IntegrationModel: return integration_objects.update( - IntegrationGithubFile, + IntegrationModel, id=id, updated_by=updated_by, running_id=running_id, - source=source, - path=path, - sha=sha, - delta_criteria=delta_criteria, - minio_file_name=minio_file_name, updated_at=updated_at, with_commit=with_commit, + **metadata ) def clear_history(id: str, with_commit: bool = True) -> None: - integration_objects.clear_history(IntegrationGithubFile, id, with_commit) + integration_objects.clear_history(IntegrationModel, id, with_commit) def delete_many(ids: List[str], with_commit: bool = True) -> None: - integration_objects.delete_many(IntegrationGithubFile, ids, with_commit) + integration_objects.delete_many(IntegrationModel, ids, with_commit) diff --git a/integration_objects/github_issue.py b/integration_objects/github_issue.py index 0b3c5b9f..7fe0e588 100644 --- a/integration_objects/github_issue.py +++ b/integration_objects/github_issue.py @@ -4,59 +4,47 @@ from ..models import IntegrationGithubIssue from .. import integration_objects +IntegrationModel = IntegrationGithubIssue -def get_by_id(id: str) -> IntegrationGithubIssue: - return integration_objects.get_by_id(IntegrationGithubIssue, id) +def get_by_id(id: str) -> IntegrationModel: + return integration_objects.get_by_id(IntegrationModel, id) -def get_by_running_id(integration_id: str, running_id: int) -> IntegrationGithubIssue: + +def get_by_running_id(integration_id: str, running_id: int) -> IntegrationModel: return integration_objects.get_by_running_id( - IntegrationGithubIssue, integration_id, running_id + IntegrationModel, integration_id, running_id ) -def get_all_by_integration_id(integration_id: str) -> List[IntegrationGithubIssue]: +def get_all_by_integration_id(integration_id: str) -> List[IntegrationModel]: return integration_objects.get_all_by_integration_id( - IntegrationGithubIssue, integration_id + IntegrationModel, integration_id ) -def get_all_by_project_id(project_id: str) -> List[IntegrationGithubIssue]: - return integration_objects.get_all_by_project_id(IntegrationGithubIssue, project_id) +def get_all_by_project_id(project_id: str) -> List[IntegrationModel]: + return integration_objects.get_all_by_project_id(IntegrationModel, project_id) def create( created_by: str, integration_id: str, running_id: int, - source: str, - url: str, - state: str, - number: str, - delta_criteria: str, - minio_file_name: str, - milestone: Optional[str] = None, - assignee: Optional[str] = None, created_at: Optional[datetime] = None, id: Optional[str] = None, with_commit: bool = True, -) -> IntegrationGithubIssue: + **metadata +) -> IntegrationModel: return integration_objects.create( - IntegrationGithubIssue, + IntegrationModel, created_by=created_by, integration_id=integration_id, running_id=running_id, - source=source, - url=url, - state=state, - number=number, - milestone=milestone, - assignee=assignee, - delta_criteria=delta_criteria, - minio_file_name=minio_file_name, created_at=created_at, id=id, with_commit=with_commit, + **metadata ) @@ -64,38 +52,24 @@ def update( id: str, updated_by: str, running_id: Optional[int] = None, - source: Optional[str] = None, - url: Optional[str] = None, - state: Optional[str] = None, - number: Optional[str] = None, - milestone: Optional[str] = None, - assignee: Optional[str] = None, - delta_criteria: Optional[str] = None, - minio_file_name: Optional[str] = None, updated_at: Optional[datetime] = None, with_commit: bool = True, -) -> IntegrationGithubIssue: + **metadata +) -> IntegrationModel: return integration_objects.update( - IntegrationGithubIssue, + IntegrationModel, id=id, updated_by=updated_by, running_id=running_id, - source=source, - url=url, - state=state, - number=number, - milestone=milestone, - assignee=assignee, - delta_criteria=delta_criteria, - minio_file_name=minio_file_name, updated_at=updated_at, with_commit=with_commit, + **metadata ) def clear_history(id: str, with_commit: bool = True) -> None: - integration_objects.clear_history(IntegrationGithubIssue, id, with_commit) + integration_objects.clear_history(IntegrationModel, id, with_commit) def delete_many(ids: List[str], with_commit: bool = True) -> None: - integration_objects.delete_many(IntegrationGithubIssue, ids, with_commit) + integration_objects.delete_many(IntegrationModel, ids, with_commit) diff --git a/integration_objects/pdf.py b/integration_objects/pdf.py index 3d82a859..469d5a87 100644 --- a/integration_objects/pdf.py +++ b/integration_objects/pdf.py @@ -4,55 +4,47 @@ from ..models import IntegrationPdf from .. import integration_objects +IntegrationModel = IntegrationPdf -def get_by_id(id: str) -> IntegrationPdf: - return integration_objects.get_by_id(IntegrationPdf, id) +def get_by_id(id: str) -> IntegrationModel: + return integration_objects.get_by_id(IntegrationModel, id) -def get_by_running_id(integration_id: str, running_id: int) -> IntegrationPdf: + +def get_by_running_id(integration_id: str, running_id: int) -> IntegrationModel: return integration_objects.get_by_running_id( - IntegrationPdf, integration_id, running_id + IntegrationModel, integration_id, running_id ) -def get_all_by_integration_id(integration_id: str) -> List[IntegrationPdf]: - return integration_objects.get_all_by_integration_id(IntegrationPdf, integration_id) +def get_all_by_integration_id(integration_id: str) -> List[IntegrationModel]: + return integration_objects.get_all_by_integration_id( + IntegrationModel, integration_id + ) -def get_all_by_project_id(project_id: str) -> List[IntegrationPdf]: - return integration_objects.get_all_by_project_id(IntegrationPdf, project_id) +def get_all_by_project_id(project_id: str) -> List[IntegrationModel]: + return integration_objects.get_all_by_project_id(IntegrationModel, project_id) def create( created_by: str, integration_id: str, running_id: int, - source: str, - file_path: str, - page: int, - total_pages: int, - title: str, - delta_criteria: str, - minio_file_name: str, created_at: Optional[datetime] = None, id: Optional[str] = None, with_commit: bool = True, -) -> IntegrationPdf: + **metadata +) -> IntegrationModel: return integration_objects.create( - IntegrationPdf, + IntegrationModel, created_by=created_by, integration_id=integration_id, running_id=running_id, - source=source, - file_path=file_path, - page=page, - total_pages=total_pages, - title=title, - delta_criteria=delta_criteria, - minio_file_name=minio_file_name, created_at=created_at, id=id, with_commit=with_commit, + **metadata ) @@ -60,36 +52,24 @@ def update( id: str, updated_by: str, running_id: Optional[int] = None, - source: Optional[str] = None, - file_path: Optional[str] = None, - page: Optional[int] = None, - total_pages: Optional[int] = None, - title: Optional[str] = None, - delta_criteria: Optional[str] = None, - minio_file_name: Optional[str] = None, updated_at: Optional[datetime] = None, with_commit: bool = True, -) -> IntegrationPdf: + **metadata +) -> IntegrationModel: return integration_objects.update( - IntegrationPdf, + IntegrationModel, id=id, updated_by=updated_by, running_id=running_id, - source=source, - file_path=file_path, - page=page, - total_pages=total_pages, - title=title, - delta_criteria=delta_criteria, - minio_file_name=minio_file_name, updated_at=updated_at, with_commit=with_commit, + **metadata ) def clear_history(id: str, with_commit: bool = True) -> None: - integration_objects.clear_history(IntegrationPdf, id, with_commit) + integration_objects.clear_history(IntegrationModel, id, with_commit) def delete_many(ids: List[str], with_commit: bool = True) -> None: - integration_objects.delete_many(IntegrationPdf, ids, with_commit) + integration_objects.delete_many(IntegrationModel, ids, with_commit) From 2ec00dba5c537267a748778f8744ef8caaa1022b Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 27 May 2025 14:00:52 +0200 Subject: [PATCH 032/114] perf: update integrations delta --- business_objects/record.py | 11 ----------- enums.py | 6 ++---- integration_objects/__init__.py | 24 ++++++++++++++++++------ integration_objects/github_file.py | 2 -- integration_objects/github_issue.py | 2 -- integration_objects/pdf.py | 2 -- models.py | 1 + 7 files changed, 21 insertions(+), 27 deletions(-) diff --git a/business_objects/record.py b/business_objects/record.py index 7fd02bdc..75d87bab 100644 --- a/business_objects/record.py +++ b/business_objects/record.py @@ -913,14 +913,3 @@ def get_first_no_text_column(project_id: str, record_id: str) -> str: WHERE r.project_id = '{project_id}' AND r.id = '{record_id}' """ return general.execute_first(query)[0] - - -def get_integration_delta_record(project_id: str, source: str) -> Record: - project_id = prevent_sql_injection(project_id, isinstance(project_id, str)) - source = prevent_sql_injection(source, isinstance(source, str)) - query = f""" - SELECT r.* - FROM record r - WHERE r.project_id = '{project_id}' AND r.data->>'source' = '{source}' - """ - return general.execute_first(query) diff --git a/enums.py b/enums.py index 96fd1dd3..0eae526f 100644 --- a/enums.py +++ b/enums.py @@ -941,16 +941,14 @@ class IntegrationMetadata(Enum): @staticmethod def from_string(value: str): - default = IntegrationMetadata.__DEFAULT__.value - try: metadata_keys = IntegrationMetadata[value].value except KeyError: raise ValueError( f"Could not parse IntegrationMetadata from string '{value}'" ) - return default.union(metadata_keys) + return IntegrationMetadata.__DEFAULT__.union(metadata_keys) @staticmethod def from_table_name(table_name: str): - raise IntegrationMetadata.from_string(table_name.upper()) + return IntegrationMetadata.from_string(table_name.upper()) diff --git a/integration_objects/__init__.py b/integration_objects/__init__.py index 47f1fe99..98e0a842 100644 --- a/integration_objects/__init__.py +++ b/integration_objects/__init__.py @@ -23,6 +23,17 @@ def get_by_running_id(IntegrationModel, integration_id: str, running_id: int) -> ) +def get_by_source(IntegrationModel, integration_id: str, source: str) -> object: + return ( + session.query(IntegrationModel) + .filter( + IntegrationModel.integration_id == integration_id, + IntegrationModel.source == source, + ) + .first() + ) + + def get_all_by_integration_id(IntegrationModel, integration_id: str) -> List[object]: return ( session.query(IntegrationModel) @@ -75,7 +86,6 @@ def update( updated_by: str, running_id: Optional[int] = None, updated_at: Optional[datetime] = None, - with_commit: bool = True, **metadata, ) -> object: integration_record = get_by_id(IntegrationModel, id) @@ -86,16 +96,19 @@ def update( if updated_at is not None: integration_record.updated_at = updated_at + record_updated = False kwargs = __get_supported_metadata(IntegrationModel.__tablename__, **metadata) for key, value in kwargs.items(): if not hasattr(integration_record, key): raise ValueError( f"Invalid field '{key}' for {IntegrationModel.__tablename__}" ) - if value is not None: + existing_value = getattr(integration_record, key, None) + if value is not None and value != existing_value: setattr(integration_record, key, value) + record_updated = True - general.add(integration_record, with_commit) + general.add(integration_record, with_commit=record_updated) return integration_record @@ -116,15 +129,14 @@ def clear_history(IntegrationModel, id: str, with_commit: bool = False) -> None: def __get_supported_metadata(table_name: str, **kwargs) -> None: supported_keys = IntegrationMetadata.from_table_name(table_name) - return { - key: kwargs[key] for key in supported_keys.value.intersection(kwargs.keys()) - } + return {key: kwargs[key] for key in supported_keys.intersection(kwargs.keys())} __all__ = [ "create", "update", "delete_many", + "clear_history", "get_by_id", "get_by_running_id", "get_all_by_integration_id", diff --git a/integration_objects/github_file.py b/integration_objects/github_file.py index 7233b78c..95697f1e 100644 --- a/integration_objects/github_file.py +++ b/integration_objects/github_file.py @@ -53,7 +53,6 @@ def update( updated_by: str, running_id: Optional[int] = None, updated_at: Optional[datetime] = None, - with_commit: bool = True, **metadata ) -> IntegrationModel: return integration_objects.update( @@ -62,7 +61,6 @@ def update( updated_by=updated_by, running_id=running_id, updated_at=updated_at, - with_commit=with_commit, **metadata ) diff --git a/integration_objects/github_issue.py b/integration_objects/github_issue.py index 7fe0e588..accadebb 100644 --- a/integration_objects/github_issue.py +++ b/integration_objects/github_issue.py @@ -53,7 +53,6 @@ def update( updated_by: str, running_id: Optional[int] = None, updated_at: Optional[datetime] = None, - with_commit: bool = True, **metadata ) -> IntegrationModel: return integration_objects.update( @@ -62,7 +61,6 @@ def update( updated_by=updated_by, running_id=running_id, updated_at=updated_at, - with_commit=with_commit, **metadata ) diff --git a/integration_objects/pdf.py b/integration_objects/pdf.py index 469d5a87..e6bfe2d3 100644 --- a/integration_objects/pdf.py +++ b/integration_objects/pdf.py @@ -53,7 +53,6 @@ def update( updated_by: str, running_id: Optional[int] = None, updated_at: Optional[datetime] = None, - with_commit: bool = True, **metadata ) -> IntegrationModel: return integration_objects.update( @@ -62,7 +61,6 @@ def update( updated_by=updated_by, running_id=running_id, updated_at=updated_at, - with_commit=with_commit, **metadata ) diff --git a/models.py b/models.py index eb091a17..d489205a 100644 --- a/models.py +++ b/models.py @@ -2166,6 +2166,7 @@ class IntegrationGithubFile(Base): source = Column(String, index=True) path = Column(String) sha = Column(String) + code_language = Column(String) delta_criteria = Column(JSON) minio_file_name = Column(String) From 614d70677cd8d771220ee679b3d2f6ec8e77d12d Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Wed, 28 May 2025 00:25:58 +0200 Subject: [PATCH 033/114] perf: add sharepoint integration --- enums.py | 4 ++- integration_objects/__init__.py | 54 ++++++++++++++++++++++++--------- models.py | 33 ++++++++++++++++++++ 3 files changed, 75 insertions(+), 16 deletions(-) diff --git a/enums.py b/enums.py index 0eae526f..296b889b 100644 --- a/enums.py +++ b/enums.py @@ -162,6 +162,7 @@ class Tablenames(Enum): INTEGRATION_GITHUB_FILE = "github_file" INTEGRATION_GITHUB_ISSUE = "github_issue" INTEGRATION_PDF = "pdf" + INTEGRATION_SHAREPOINT = "sharepoint" def snake_case_to_pascal_case(self): # the type name (written in PascalCase) of a table is needed to create backrefs @@ -891,7 +892,7 @@ class CognitionIntegrationType(Enum): # DOCX = "DOCX" # XLSX = "XLSX" # WEBPAGE = "WEBPAGE" - SQL = "SQL" + SHAREPOINT = "SHAREPOINT" GITHUB_FILE = "GITHUB_FILE" GITHUB_ISSUE = "GITHUB_ISSUE" PDF = "PDF" @@ -938,6 +939,7 @@ class IntegrationMetadata(Enum): GITHUB_FILE = {"path", "sha", "code_language"} GITHUB_ISSUE = {"url", "state", "number", "assignee", "milestone"} PDF = {"file_path", "page", "total_pages", "title"} + SHAREPOINT = {} @staticmethod def from_string(value: str): diff --git a/integration_objects/__init__.py b/integration_objects/__init__.py index 98e0a842..b1f8f193 100644 --- a/integration_objects/__init__.py +++ b/integration_objects/__init__.py @@ -1,5 +1,6 @@ -from typing import List, Optional +from typing import List, Optional, Dict +from sqlalchemy import func from datetime import datetime from ..business_objects import general @@ -8,6 +9,17 @@ from ..enums import IntegrationMetadata +def get(IntegrationModel, id: str, integration_id: str) -> object: + return ( + session.query(IntegrationModel) + .filter( + IntegrationModel.id == id, + IntegrationModel.integration_id == integration_id, + ) + .first() + ) + + def get_by_id(IntegrationModel, id: str) -> object: return session.query(IntegrationModel).filter(IntegrationModel.id == id).first() @@ -55,6 +67,28 @@ def get_all_by_project_id(IntegrationModel, project_id: str) -> List[object]: ) +def get_existing_integration_records( + IntegrationModel, integration_id: str +) -> Dict[str, object]: + return { + integration.source: integration + for integration in get_all_by_integration_id(IntegrationModel, integration_id) + } + + +def get_max_running_id(IntegrationModel, integration_id: str) -> int: + """ + Get the maximum running_id for a given integration_id. + Returns 0 if no records are found. + """ + max_running_id = ( + session.query(func.coalesce(func.max(IntegrationModel.running_id), 0)) + .filter(IntegrationModel.integration_id == integration_id) + .first() + ) + return max_running_id[0] + + def create( IntegrationModel, created_by: str, @@ -83,12 +117,13 @@ def create( def update( IntegrationModel, id: str, + integration_id: str, updated_by: str, running_id: Optional[int] = None, updated_at: Optional[datetime] = None, **metadata, ) -> object: - integration_record = get_by_id(IntegrationModel, id) + integration_record = get(IntegrationModel, id, integration_id) integration_record.updated_by = updated_by if running_id is not None: @@ -108,7 +143,8 @@ def update( setattr(integration_record, key, value) record_updated = True - general.add(integration_record, with_commit=record_updated) + if record_updated: + general.add(integration_record, with_commit=True) return integration_record @@ -130,15 +166,3 @@ def clear_history(IntegrationModel, id: str, with_commit: bool = False) -> None: def __get_supported_metadata(table_name: str, **kwargs) -> None: supported_keys = IntegrationMetadata.from_table_name(table_name) return {key: kwargs[key] for key in supported_keys.intersection(kwargs.keys())} - - -__all__ = [ - "create", - "update", - "delete_many", - "clear_history", - "get_by_id", - "get_by_running_id", - "get_all_by_integration_id", - "get_all_by_project_id", -] diff --git a/models.py b/models.py index d489205a..ec32b25b 100644 --- a/models.py +++ b/models.py @@ -2237,3 +2237,36 @@ class IntegrationPdf(Base): delta_criteria = Column(JSON) minio_file_name = Column(String) + + +class IntegrationSharepoint(Base): + __tablename__ = Tablenames.INTEGRATION_SHAREPOINT.value + __table_args__ = {"schema": "integration"} + id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + created_by = Column( + UUID(as_uuid=True), + ForeignKey(f"{Tablenames.USER.value}.id", ondelete="SET NULL"), + index=False, + ) + updated_by = Column( + UUID(as_uuid=True), + ForeignKey(f"{Tablenames.USER.value}.id", ondelete="SET NULL"), + index=False, + nullable=True, + ) + created_at = Column(DateTime, default=sql.func.now()) + updated_at = Column(DateTime, default=None, onupdate=sql.func.now()) + integration_id = Column( + UUID(as_uuid=True), + ForeignKey(f"cognition.{Tablenames.INTEGRATION.value}.id", ondelete="CASCADE"), + index=True, + ) + running_id = Column(Integer, index=True) + source = Column(String, index=True) + # file_path = Column(String) + # page = Column(Integer) + # total_pages = Column(Integer) + # title = Column(String) + + delta_criteria = Column(JSON) + minio_file_name = Column(String) From d7c8d6b31bd6702285f7c64b63c364901d89ca1a Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 29 May 2025 02:05:18 +0200 Subject: [PATCH 034/114] perf: update integration objects --- integration_objects/__init__.py | 25 ++++++++-- integration_objects/github_file.py | 73 ----------------------------- integration_objects/github_issue.py | 73 ----------------------------- integration_objects/pdf.py | 73 ----------------------------- 4 files changed, 20 insertions(+), 224 deletions(-) delete mode 100644 integration_objects/github_file.py delete mode 100644 integration_objects/github_issue.py delete mode 100644 integration_objects/pdf.py diff --git a/integration_objects/__init__.py b/integration_objects/__init__.py index b1f8f193..f3313e05 100644 --- a/integration_objects/__init__.py +++ b/integration_objects/__init__.py @@ -1,4 +1,4 @@ -from typing import List, Optional, Dict +from typing import List, Optional, Dict, Union from sqlalchemy import func from datetime import datetime @@ -99,7 +99,7 @@ def create( with_commit: bool = True, **metadata, ) -> object: - kwargs = __get_supported_metadata(IntegrationModel.__tablename__, **metadata) + kwargs = __get_supported_metadata(IntegrationModel.__tablename__, metadata) integration_record = IntegrationModel( created_by=created_by, integration_id=integration_id, @@ -132,7 +132,7 @@ def update( integration_record.updated_at = updated_at record_updated = False - kwargs = __get_supported_metadata(IntegrationModel.__tablename__, **metadata) + kwargs = __get_supported_metadata(IntegrationModel.__tablename__, metadata) for key, value in kwargs.items(): if not hasattr(integration_record, key): raise ValueError( @@ -163,6 +163,21 @@ def clear_history(IntegrationModel, id: str, with_commit: bool = False) -> None: general.add(integration_record, with_commit) -def __get_supported_metadata(table_name: str, **kwargs) -> None: +def __get_supported_metadata( + table_name: str, metadata: Dict[str, Union[str, int, float, bool]] +) -> None: supported_keys = IntegrationMetadata.from_table_name(table_name) - return {key: kwargs[key] for key in supported_keys.intersection(kwargs.keys())} + return {key: metadata[key] for key in supported_keys.intersection(metadata.keys())} + + +def __rename_metadata( + table_name: str, metadata: Dict[str, Union[str, int, float, bool]] +) -> Dict[str, object]: + rename_keys = { + "id": f"{table_name}_id", + "created_by": f"{table_name}_created_by", + "created_at": f"{table_name}_created_at", + "updated_by": f"{table_name}_updated_by", + "updated_at": f"{table_name}_updated_at", + } + return {rename_keys.get(key, key): value for key, value in metadata.items()} diff --git a/integration_objects/github_file.py b/integration_objects/github_file.py deleted file mode 100644 index 95697f1e..00000000 --- a/integration_objects/github_file.py +++ /dev/null @@ -1,73 +0,0 @@ -from typing import List, Optional -from datetime import datetime - -from ..models import IntegrationGithubFile -from .. import integration_objects - -IntegrationModel = IntegrationGithubFile - - -def get_by_id(id: str) -> IntegrationModel: - return integration_objects.get_by_id(IntegrationModel, id) - - -def get_by_running_id(integration_id: str, running_id: int) -> IntegrationModel: - return integration_objects.get_by_running_id( - IntegrationModel, integration_id, running_id - ) - - -def get_all_by_integration_id(integration_id: str) -> List[IntegrationModel]: - return integration_objects.get_all_by_integration_id( - IntegrationModel, integration_id - ) - - -def get_all_by_project_id(project_id: str) -> List[IntegrationModel]: - return integration_objects.get_all_by_project_id(IntegrationModel, project_id) - - -def create( - created_by: str, - integration_id: str, - running_id: int, - created_at: Optional[datetime] = None, - id: Optional[str] = None, - with_commit: bool = True, - **metadata -) -> IntegrationModel: - return integration_objects.create( - IntegrationModel, - created_by=created_by, - integration_id=integration_id, - running_id=running_id, - created_at=created_at, - id=id, - with_commit=with_commit, - **metadata - ) - - -def update( - id: str, - updated_by: str, - running_id: Optional[int] = None, - updated_at: Optional[datetime] = None, - **metadata -) -> IntegrationModel: - return integration_objects.update( - IntegrationModel, - id=id, - updated_by=updated_by, - running_id=running_id, - updated_at=updated_at, - **metadata - ) - - -def clear_history(id: str, with_commit: bool = True) -> None: - integration_objects.clear_history(IntegrationModel, id, with_commit) - - -def delete_many(ids: List[str], with_commit: bool = True) -> None: - integration_objects.delete_many(IntegrationModel, ids, with_commit) diff --git a/integration_objects/github_issue.py b/integration_objects/github_issue.py deleted file mode 100644 index accadebb..00000000 --- a/integration_objects/github_issue.py +++ /dev/null @@ -1,73 +0,0 @@ -from typing import List, Optional -from datetime import datetime - -from ..models import IntegrationGithubIssue -from .. import integration_objects - -IntegrationModel = IntegrationGithubIssue - - -def get_by_id(id: str) -> IntegrationModel: - return integration_objects.get_by_id(IntegrationModel, id) - - -def get_by_running_id(integration_id: str, running_id: int) -> IntegrationModel: - return integration_objects.get_by_running_id( - IntegrationModel, integration_id, running_id - ) - - -def get_all_by_integration_id(integration_id: str) -> List[IntegrationModel]: - return integration_objects.get_all_by_integration_id( - IntegrationModel, integration_id - ) - - -def get_all_by_project_id(project_id: str) -> List[IntegrationModel]: - return integration_objects.get_all_by_project_id(IntegrationModel, project_id) - - -def create( - created_by: str, - integration_id: str, - running_id: int, - created_at: Optional[datetime] = None, - id: Optional[str] = None, - with_commit: bool = True, - **metadata -) -> IntegrationModel: - return integration_objects.create( - IntegrationModel, - created_by=created_by, - integration_id=integration_id, - running_id=running_id, - created_at=created_at, - id=id, - with_commit=with_commit, - **metadata - ) - - -def update( - id: str, - updated_by: str, - running_id: Optional[int] = None, - updated_at: Optional[datetime] = None, - **metadata -) -> IntegrationModel: - return integration_objects.update( - IntegrationModel, - id=id, - updated_by=updated_by, - running_id=running_id, - updated_at=updated_at, - **metadata - ) - - -def clear_history(id: str, with_commit: bool = True) -> None: - integration_objects.clear_history(IntegrationModel, id, with_commit) - - -def delete_many(ids: List[str], with_commit: bool = True) -> None: - integration_objects.delete_many(IntegrationModel, ids, with_commit) diff --git a/integration_objects/pdf.py b/integration_objects/pdf.py deleted file mode 100644 index e6bfe2d3..00000000 --- a/integration_objects/pdf.py +++ /dev/null @@ -1,73 +0,0 @@ -from typing import List, Optional -from datetime import datetime - -from ..models import IntegrationPdf -from .. import integration_objects - -IntegrationModel = IntegrationPdf - - -def get_by_id(id: str) -> IntegrationModel: - return integration_objects.get_by_id(IntegrationModel, id) - - -def get_by_running_id(integration_id: str, running_id: int) -> IntegrationModel: - return integration_objects.get_by_running_id( - IntegrationModel, integration_id, running_id - ) - - -def get_all_by_integration_id(integration_id: str) -> List[IntegrationModel]: - return integration_objects.get_all_by_integration_id( - IntegrationModel, integration_id - ) - - -def get_all_by_project_id(project_id: str) -> List[IntegrationModel]: - return integration_objects.get_all_by_project_id(IntegrationModel, project_id) - - -def create( - created_by: str, - integration_id: str, - running_id: int, - created_at: Optional[datetime] = None, - id: Optional[str] = None, - with_commit: bool = True, - **metadata -) -> IntegrationModel: - return integration_objects.create( - IntegrationModel, - created_by=created_by, - integration_id=integration_id, - running_id=running_id, - created_at=created_at, - id=id, - with_commit=with_commit, - **metadata - ) - - -def update( - id: str, - updated_by: str, - running_id: Optional[int] = None, - updated_at: Optional[datetime] = None, - **metadata -) -> IntegrationModel: - return integration_objects.update( - IntegrationModel, - id=id, - updated_by=updated_by, - running_id=running_id, - updated_at=updated_at, - **metadata - ) - - -def clear_history(id: str, with_commit: bool = True) -> None: - integration_objects.clear_history(IntegrationModel, id, with_commit) - - -def delete_many(ids: List[str], with_commit: bool = True) -> None: - integration_objects.delete_many(IntegrationModel, ids, with_commit) From 359187eb28d57bee2ffd37cc59cd4dcaaf16a767 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 29 May 2025 16:08:23 +0200 Subject: [PATCH 035/114] perf: expand IntegrationSharepoint --- enums.py | 17 ++++++++++++++++- integration_objects/__init__.py | 2 +- models.py | 19 +++++++++++++++---- 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/enums.py b/enums.py index 296b889b..69bc3d6e 100644 --- a/enums.py +++ b/enums.py @@ -939,7 +939,22 @@ class IntegrationMetadata(Enum): GITHUB_FILE = {"path", "sha", "code_language"} GITHUB_ISSUE = {"url", "state", "number", "assignee", "milestone"} PDF = {"file_path", "page", "total_pages", "title"} - SHAREPOINT = {} + SHAREPOINT = { + "extension", + "object_id", + "parent_path", + "name", + "web_url", + f"{Tablenames.INTEGRATION_SHAREPOINT.value}_created_by", + "modified_by", + "created", + "modified", + "description", + "size", + "mime_type", + "hashes", + "permissions", + } @staticmethod def from_string(value: str): diff --git a/integration_objects/__init__.py b/integration_objects/__init__.py index f3313e05..eaf81622 100644 --- a/integration_objects/__init__.py +++ b/integration_objects/__init__.py @@ -76,7 +76,7 @@ def get_existing_integration_records( } -def get_max_running_id(IntegrationModel, integration_id: str) -> int: +def get_running_id(IntegrationModel, integration_id: str) -> int: """ Get the maximum running_id for a given integration_id. Returns 0 if no records are found. diff --git a/models.py b/models.py index ec32b25b..6738316d 100644 --- a/models.py +++ b/models.py @@ -2263,10 +2263,21 @@ class IntegrationSharepoint(Base): ) running_id = Column(Integer, index=True) source = Column(String, index=True) - # file_path = Column(String) - # page = Column(Integer) - # total_pages = Column(Integer) - # title = Column(String) + + extension = Column(String) + object_id = Column(String) + parent_path = Column(String) + name = Column(String) + web_url = Column(String) + sharepoint_created_by = Column(String) + modified_by = Column(String) + created = Column(String) + modified = Column(String) + description = Column(String) + size = Column(String) + mime_type = Column(String) + hashes = Column(JSON) + permissions = Column(JSON) delta_criteria = Column(JSON) minio_file_name = Column(String) From 3f774d16a49210dd652bbb0c4d7375be790c4a08 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 29 May 2025 19:45:28 +0200 Subject: [PATCH 036/114] fix: integration.started_at perf: add unique constraints to integrations --- cognition_objects/integration.py | 12 ++++++---- models.py | 41 ++++++++++++++++++++++++++++---- 2 files changed, 44 insertions(+), 9 deletions(-) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index c471034a..d1362c16 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -1,5 +1,5 @@ from typing import List, Optional, Dict -from datetime import datetime +import datetime from fastapi import HTTPException from sqlalchemy import func @@ -65,8 +65,9 @@ def create( integration_config: Dict, llm_config: Dict, with_commit: bool = True, - created_at: Optional[datetime] = None, - finished_at: Optional[datetime] = None, + started_at: Optional[datetime.datetime] = None, + created_at: Optional[datetime.datetime] = None, + finished_at: Optional[datetime.datetime] = None, id: Optional[str] = None, project_id: Optional[str] = None, ) -> CognitionIntegration: @@ -78,6 +79,7 @@ def create( project_id=project_id, created_by=user_id, created_at=created_at, + started_at=started_at, finished_at=finished_at, name=name, description=description, @@ -101,8 +103,8 @@ def update( integration_config: Optional[int] = None, llm_config: Optional[Dict] = None, error_message: Optional[str] = None, - started_at: Optional[datetime] = None, - finished_at: Optional[datetime] = None, + started_at: Optional[datetime.datetime] = None, + finished_at: Optional[datetime.datetime] = None, with_commit: bool = True, ) -> CognitionIntegration: integration: CognitionIntegration = get_by_id(id) diff --git a/models.py b/models.py index 6738316d..09d10e4e 100644 --- a/models.py +++ b/models.py @@ -2142,7 +2142,15 @@ class CognitionIntegrationAccess(Base): class IntegrationGithubFile(Base): __tablename__ = Tablenames.INTEGRATION_GITHUB_FILE.value - __table_args__ = {"schema": "integration"} + __table_args__ = ( + UniqueConstraint( + "integration_id", + "running_id", + "source", + name="unique_source", + ), + {"schema": "integration"}, + ) id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) created_by = Column( UUID(as_uuid=True), @@ -2174,7 +2182,16 @@ class IntegrationGithubFile(Base): class IntegrationGithubIssue(Base): __tablename__ = Tablenames.INTEGRATION_GITHUB_ISSUE.value - __table_args__ = {"schema": "integration"} + __table_args__ = ( + UniqueConstraint( + "integration_id", + "running_id", + "source", + name="unique_source", + ), + {"schema": "integration"}, + ) + id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) created_by = Column( UUID(as_uuid=True), @@ -2208,7 +2225,15 @@ class IntegrationGithubIssue(Base): class IntegrationPdf(Base): __tablename__ = Tablenames.INTEGRATION_PDF.value - __table_args__ = {"schema": "integration"} + __table_args__ = ( + UniqueConstraint( + "integration_id", + "running_id", + "source", + name="unique_source", + ), + {"schema": "integration"}, + ) id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) created_by = Column( UUID(as_uuid=True), @@ -2241,7 +2266,15 @@ class IntegrationPdf(Base): class IntegrationSharepoint(Base): __tablename__ = Tablenames.INTEGRATION_SHAREPOINT.value - __table_args__ = {"schema": "integration"} + __table_args__ = ( + UniqueConstraint( + "integration_id", + "running_id", + "source", + name="unique_source", + ), + {"schema": "integration"}, + ) id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) created_by = Column( UUID(as_uuid=True), From 03169de2cd6b042136dc208ecd46cb3ee38922a7 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 29 May 2025 21:01:39 +0200 Subject: [PATCH 037/114] perf: integration data types --- models.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/models.py b/models.py index 09d10e4e..a148eb48 100644 --- a/models.py +++ b/models.py @@ -2213,15 +2213,14 @@ class IntegrationGithubIssue(Base): ) running_id = Column(Integer, index=True) source = Column(String, index=True) + minio_file_name = Column(String) + url = Column(String) state = Column(String) assignee = Column(String) milestone = Column(String) number = Column(Integer) - delta_criteria = Column(JSON) - minio_file_name = Column(String) - class IntegrationPdf(Base): __tablename__ = Tablenames.INTEGRATION_PDF.value @@ -2255,14 +2254,13 @@ class IntegrationPdf(Base): ) running_id = Column(Integer, index=True) source = Column(String, index=True) + minio_file_name = Column(String) + file_path = Column(String) page = Column(Integer) total_pages = Column(Integer) title = Column(String) - delta_criteria = Column(JSON) - minio_file_name = Column(String) - class IntegrationSharepoint(Base): __tablename__ = Tablenames.INTEGRATION_SHAREPOINT.value @@ -2296,6 +2294,7 @@ class IntegrationSharepoint(Base): ) running_id = Column(Integer, index=True) source = Column(String, index=True) + minio_file_name = Column(String) extension = Column(String) object_id = Column(String) @@ -2304,13 +2303,10 @@ class IntegrationSharepoint(Base): web_url = Column(String) sharepoint_created_by = Column(String) modified_by = Column(String) - created = Column(String) - modified = Column(String) + created = Column(DateTime, default=None) + modified = Column(DateTime, default=None) description = Column(String) - size = Column(String) + size = Column(Integer) mime_type = Column(String) hashes = Column(JSON) permissions = Column(JSON) - - delta_criteria = Column(JSON) - minio_file_name = Column(String) From 18990bbd7147942c558eac0ee0e448aeef646d6a Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 29 May 2025 21:12:28 +0200 Subject: [PATCH 038/114] perf: unique constraint names --- models.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/models.py b/models.py index a148eb48..511eea55 100644 --- a/models.py +++ b/models.py @@ -2147,7 +2147,7 @@ class IntegrationGithubFile(Base): "integration_id", "running_id", "source", - name="unique_source", + name=f"unique_{__tablename__}_source", ), {"schema": "integration"}, ) @@ -2187,7 +2187,7 @@ class IntegrationGithubIssue(Base): "integration_id", "running_id", "source", - name="unique_source", + name=f"unique_{__tablename__}_source", ), {"schema": "integration"}, ) @@ -2229,7 +2229,7 @@ class IntegrationPdf(Base): "integration_id", "running_id", "source", - name="unique_source", + name=f"unique_{__tablename__}_source", ), {"schema": "integration"}, ) @@ -2269,7 +2269,7 @@ class IntegrationSharepoint(Base): "integration_id", "running_id", "source", - name="unique_source", + name=f"unique_{__tablename__}_source", ), {"schema": "integration"}, ) From 25b039a008634941c556eca61a17e03c6ee83c01 Mon Sep 17 00:00:00 2001 From: Lina Date: Mon, 2 Jun 2025 16:03:54 +0200 Subject: [PATCH 039/114] Reset finished at for new integrations --- cognition_objects/integration.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index d1362c16..ec43b459 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -125,8 +125,8 @@ def update( integration.error_message = error_message if started_at is not None: integration.started_at = started_at - if finished_at is not None: - integration.finished_at = finished_at + + integration.finished_at = finished_at general.add(integration, with_commit) return integration From aa4416e962b310cf3d5f6aefccc1bcd4130b1296 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 3 Jun 2025 10:32:00 +0200 Subject: [PATCH 040/114] perf: update integration objects --- integration_objects/__init__.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/integration_objects/__init__.py b/integration_objects/__init__.py index eaf81622..8fe0294a 100644 --- a/integration_objects/__init__.py +++ b/integration_objects/__init__.py @@ -76,17 +76,16 @@ def get_existing_integration_records( } -def get_running_id(IntegrationModel, integration_id: str) -> int: - """ - Get the maximum running_id for a given integration_id. - Returns 0 if no records are found. - """ - max_running_id = ( - session.query(func.coalesce(func.max(IntegrationModel.running_id), 0)) +def get_running_ids(IntegrationModel, integration_id: str) -> int: + return dict( + session.query( + IntegrationModel.source, + func.coalesce(func.max(IntegrationModel.running_id), 0), + ) .filter(IntegrationModel.integration_id == integration_id) - .first() + .group_by(IntegrationModel.source) + .all() ) - return max_running_id[0] def create( From 40cbf2f0ddaaef307a35c6d3077af55f4b64c48c Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 3 Jun 2025 10:52:06 +0200 Subject: [PATCH 041/114] perf: add integration delta deletion --- integration_objects/__init__.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/integration_objects/__init__.py b/integration_objects/__init__.py index 8fe0294a..ca433c3c 100644 --- a/integration_objects/__init__.py +++ b/integration_objects/__init__.py @@ -3,7 +3,7 @@ from sqlalchemy import func from datetime import datetime -from ..business_objects import general +from ..business_objects import general, record as record_db_bo from ..cognition_objects import integration as integration_db_bo from ..session import session from ..enums import IntegrationMetadata @@ -148,10 +148,24 @@ def update( return integration_record -def delete_many(IntegrationModel, ids: List[str], with_commit: bool = False) -> None: +def delete_many( + IntegrationModel, + ids: List[str], + project_id: Optional[str] = None, + with_commit: bool = False, +) -> None: integration_records = session.query(IntegrationModel).filter( IntegrationModel.id.in_(ids) ) + if project_id: + delete_running_ids = [ + integration_record.running_id for integration_record in integration_records + ] + refinery_record_ids = [ + record.id + for record in record_db_bo.get_all(project_id=project_id) + if record.data["running_id"] in delete_running_ids + ] integration_records.delete(synchronize_session=False) general.flush_or_commit(with_commit) From 515e01ab4c99630b05b84f5ffa4a99c257e31280 Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Tue, 3 Jun 2025 14:15:43 +0200 Subject: [PATCH 042/114] basic models --- cognition_objects/group.py | 68 ++++++++++++++++++++++++++++ cognition_objects/group_member.py | 73 +++++++++++++++++++++++++++++++ enums.py | 4 +- models.py | 36 +++++++++++++++ 4 files changed, 180 insertions(+), 1 deletion(-) create mode 100644 cognition_objects/group.py create mode 100644 cognition_objects/group_member.py diff --git a/cognition_objects/group.py b/cognition_objects/group.py new file mode 100644 index 00000000..f2e14b7c --- /dev/null +++ b/cognition_objects/group.py @@ -0,0 +1,68 @@ +from datetime import datetime +from typing import List, Optional +from ..business_objects import general +from ..session import session +from ..models import CognitionGroup + + +def get(group_id: str) -> CognitionGroup: + return session.query(CognitionGroup).filter(CognitionGroup.id == group_id).first() + + +def get_with_organization_id(organization_id: str, group_id: str) -> CognitionGroup: + return ( + session.query(CognitionGroup) + .filter(CognitionGroup.organization_id == organization_id, CognitionGroup.id == group_id) + .first() + ) + + +def get_all(organization_id: str) -> List[CognitionGroup]: + return ( + session.query(CognitionGroup) + .filter(CognitionGroup.organization_id == organization_id) + .order_by(CognitionGroup.name.asc()) + .all() + ) + + +def create_group( + organization_id: str, + name: str, + description: str, + created_by: str, + created_at: Optional[datetime] = None, + with_commit: bool = False, +) -> CognitionGroup: + group = CognitionGroup( + organization_id=organization_id, + name=name, + description=description, + created_by=created_by, + created_at=created_at, + ) + general.add(group, with_commit) + return group + + +def update_group( + group_id: str, + name: Optional[str] = None, + description: Optional[str] = None, + with_commit: bool = False, +) -> CognitionGroup: + group = get(group_id) + + if name is not None: + group.name = name + if description is not None: + group.description = description + general.flush_or_commit(with_commit) + + return group + + +def delete(organization_id: str, group_id: str, with_commit: bool = False) -> None: + group = get_with_organization_id(organization_id, group_id) + if group: + general.delete(group, with_commit) diff --git a/cognition_objects/group_member.py b/cognition_objects/group_member.py new file mode 100644 index 00000000..f221c1f2 --- /dev/null +++ b/cognition_objects/group_member.py @@ -0,0 +1,73 @@ +from datetime import datetime +from typing import Optional +from ..business_objects import general, user +from . import group +from ..session import session +from ..models import CognitionGroupMember + + +def get(group_id: str, id: str): + return ( + session.query(CognitionGroupMember) + .filter(CognitionGroupMember.group_id == group_id, CognitionGroupMember.id == id) + .first() + ) + + +def get_by_group_and_user(group_id: str, user_id: str) -> CognitionGroupMember: + return ( + session.query(CognitionGroupMember) + .filter(CognitionGroupMember.group_id == group_id, CognitionGroupMember.user_id == user_id) + .first() + ) + + +def get_all_by_group(group_id: str) -> list: + return session.query(CognitionGroupMember).filter(CognitionGroupMember.group_id == group_id).all() + + +def get_all_by_group_count(group_id: str) -> int: + return session.query(CognitionGroupMember).filter(CognitionGroupMember.group_id == group_id).count() + + +def create( + group_id: str, + user_id: str, + created_at: Optional[datetime] = None, + with_commit: bool = False, +) -> CognitionGroupMember: + already_exist = get_by_group_and_user(group_id=group_id, user_id=user_id) + if already_exist: + return already_exist + + group_item = group.get(group_id) + user_item = user.get(user_id) + if not group_item or not user_item: + raise Exception("Group or user not found") + if group_item.organization_id != user_item.organization_id: + raise Exception("User not in the same organization as the group") + + group_member = CognitionGroupMember( + group_id=group_id, + user_id=user_id, + created_at=created_at, + ) + general.add(group_member, with_commit) + return group_member + + +def delete_by_group_and_user_id( + group_id: str, user_id: str, with_commit: bool = False +) -> None: + group_member = get_by_group_and_user(group_id, user_id) + if group_member: + general.delete(group_member, with_commit) + + +def delete_by_user_id(user_id: str, with_commit: bool = False) -> None: + group_memberships = ( + session.query(CognitionGroupMember).filter(CognitionGroupMember.user_id == user_id).all() + ) + for membership in group_memberships: + general.delete(membership, with_commit=False) + general.flush_or_commit(with_commit) diff --git a/enums.py b/enums.py index 0a920a81..0d3895b9 100644 --- a/enums.py +++ b/enums.py @@ -155,7 +155,9 @@ class Tablenames(Enum): EVALUATION_RUN = "evaluation_run" PLAYGROUND_QUESTION = "playground_question" FULL_ADMIN_ACCESS = "full_admin_access" - + GROUP = "group" # used for group based access control + GROUP_MEMBER = "group_member" # used for group based access control + def snake_case_to_pascal_case(self): # the type name (written in PascalCase) of a table is needed to create backrefs return "".join([word.title() for word in self.value.split("_")]) diff --git a/models.py b/models.py index 7da43556..e80264e3 100644 --- a/models.py +++ b/models.py @@ -1924,6 +1924,42 @@ class GraphRAGIndex(Base): root_dir = Column(String) +class CognitionGroup(Base): + __tablename__ = Tablenames.GROUP.value + __table_args__ = {"schema": "cognition"} + id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + organization_id = Column( + UUID(as_uuid=True), + ForeignKey(f"{Tablenames.ORGANIZATION.value}.id", ondelete="CASCADE"), + index=True, + ) + name = Column(String, unique=True) + description = Column(String) + created_at = Column(DateTime, default=sql.func.now()) + created_by = Column( + UUID(as_uuid=True), + ForeignKey(f"{Tablenames.USER.value}.id", ondelete="SET NULL"), + index=True, + ) + + +class CognitionGroupMember(Base): + __tablename__ = Tablenames.GROUP_MEMBER.value + __table_args__ = {"schema": "cognition"} + id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + group_id = Column( + UUID(as_uuid=True), + ForeignKey(f"{Tablenames.GROUP_MEMBER.value}.id", ondelete="CASCADE"), + index=True, + ) + user_id = Column( + UUID(as_uuid=True), + ForeignKey(f"{Tablenames.USER.value}.id", ondelete="CASCADE"), + index=True, + ) + created_at = Column(DateTime, default=sql.func.now()) + + # =========================== Global tables =========================== class GlobalWebsocketAccess(Base): # table to store prepared websocket configuration. From 6c6f4de0915cee735506a3ae0e29a4bb44d8c827 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 3 Jun 2025 14:26:31 +0200 Subject: [PATCH 043/114] perf: last_synced_at integration column --- integration_objects/__init__.py | 12 +----------- models.py | 1 + 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/integration_objects/__init__.py b/integration_objects/__init__.py index ca433c3c..b588f8ff 100644 --- a/integration_objects/__init__.py +++ b/integration_objects/__init__.py @@ -3,7 +3,7 @@ from sqlalchemy import func from datetime import datetime -from ..business_objects import general, record as record_db_bo +from ..business_objects import general from ..cognition_objects import integration as integration_db_bo from ..session import session from ..enums import IntegrationMetadata @@ -151,21 +151,11 @@ def update( def delete_many( IntegrationModel, ids: List[str], - project_id: Optional[str] = None, with_commit: bool = False, ) -> None: integration_records = session.query(IntegrationModel).filter( IntegrationModel.id.in_(ids) ) - if project_id: - delete_running_ids = [ - integration_record.running_id for integration_record in integration_records - ] - refinery_record_ids = [ - record.id - for record in record_db_bo.get_all(project_id=project_id) - if record.data["running_id"] in delete_running_ids - ] integration_records.delete(synchronize_session=False) general.flush_or_commit(with_commit) diff --git a/models.py b/models.py index 511eea55..073bd948 100644 --- a/models.py +++ b/models.py @@ -2118,6 +2118,7 @@ class CognitionIntegration(Base): llm_config = Column(JSON) error_message = Column(String) + last_synced_at = Column(DateTime) class CognitionIntegrationAccess(Base): From c18a6eba094c90db9586a46b9a466231d6382d56 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 3 Jun 2025 14:27:55 +0200 Subject: [PATCH 044/114] perf: add is_synced column --- models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/models.py b/models.py index 073bd948..29b4dea8 100644 --- a/models.py +++ b/models.py @@ -2118,6 +2118,7 @@ class CognitionIntegration(Base): llm_config = Column(JSON) error_message = Column(String) + is_synced = Column(Boolean, default=False) last_synced_at = Column(DateTime) From d5afe9bb661f71fb38667641f979c7c2a428af40 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 3 Jun 2025 15:23:09 +0200 Subject: [PATCH 045/114] chore: add typing --- integration_objects/__init__.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/integration_objects/__init__.py b/integration_objects/__init__.py index b588f8ff..c09f1c86 100644 --- a/integration_objects/__init__.py +++ b/integration_objects/__init__.py @@ -9,7 +9,7 @@ from ..enums import IntegrationMetadata -def get(IntegrationModel, id: str, integration_id: str) -> object: +def get(IntegrationModel: type, id: str, integration_id: str) -> object: return ( session.query(IntegrationModel) .filter( @@ -20,11 +20,13 @@ def get(IntegrationModel, id: str, integration_id: str) -> object: ) -def get_by_id(IntegrationModel, id: str) -> object: +def get_by_id(IntegrationModel: type, id: str) -> object: return session.query(IntegrationModel).filter(IntegrationModel.id == id).first() -def get_by_running_id(IntegrationModel, integration_id: str, running_id: int) -> object: +def get_by_running_id( + IntegrationModel: type, integration_id: str, running_id: int +) -> object: return ( session.query(IntegrationModel) .filter( @@ -35,7 +37,7 @@ def get_by_running_id(IntegrationModel, integration_id: str, running_id: int) -> ) -def get_by_source(IntegrationModel, integration_id: str, source: str) -> object: +def get_by_source(IntegrationModel: type, integration_id: str, source: str) -> object: return ( session.query(IntegrationModel) .filter( @@ -46,7 +48,9 @@ def get_by_source(IntegrationModel, integration_id: str, source: str) -> object: ) -def get_all_by_integration_id(IntegrationModel, integration_id: str) -> List[object]: +def get_all_by_integration_id( + IntegrationModel: type, integration_id: str +) -> List[object]: return ( session.query(IntegrationModel) .filter(IntegrationModel.integration_id == integration_id) @@ -55,7 +59,7 @@ def get_all_by_integration_id(IntegrationModel, integration_id: str) -> List[obj ) -def get_all_by_project_id(IntegrationModel, project_id: str) -> List[object]: +def get_all_by_project_id(IntegrationModel: type, project_id: str) -> List[object]: integrations = integration_db_bo.get_all_by_project_id(project_id) return ( session.query(IntegrationModel) @@ -76,7 +80,7 @@ def get_existing_integration_records( } -def get_running_ids(IntegrationModel, integration_id: str) -> int: +def get_running_ids(IntegrationModel: type, integration_id: str) -> int: return dict( session.query( IntegrationModel.source, @@ -89,7 +93,7 @@ def get_running_ids(IntegrationModel, integration_id: str) -> int: def create( - IntegrationModel, + IntegrationModel: type, created_by: str, integration_id: str, running_id: int, @@ -114,7 +118,7 @@ def create( def update( - IntegrationModel, + IntegrationModel: type, id: str, integration_id: str, updated_by: str, @@ -149,7 +153,7 @@ def update( def delete_many( - IntegrationModel, + IntegrationModel: type, ids: List[str], with_commit: bool = False, ) -> None: @@ -160,7 +164,7 @@ def delete_many( general.flush_or_commit(with_commit) -def clear_history(IntegrationModel, id: str, with_commit: bool = False) -> None: +def clear_history(IntegrationModel: type, id: str, with_commit: bool = False) -> None: integration_record = get_by_id(IntegrationModel, id) integration_record.delta_criteria = None general.add(integration_record, with_commit) From dc1e5e8242bfc8760fc389cf53918435b69e95f6 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 3 Jun 2025 15:23:19 +0200 Subject: [PATCH 046/114] perf: add sync columns --- cognition_objects/integration.py | 7 ++++++- models.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index ec43b459..4f465736 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -64,12 +64,12 @@ def create( integration_type: CognitionIntegrationType, integration_config: Dict, llm_config: Dict, - with_commit: bool = True, started_at: Optional[datetime.datetime] = None, created_at: Optional[datetime.datetime] = None, finished_at: Optional[datetime.datetime] = None, id: Optional[str] = None, project_id: Optional[str] = None, + with_commit: bool = True, ) -> CognitionIntegration: if state not in CognitionMarkdownFileState.all(): raise HTTPException(status_code=400, detail=f"Invalid state: {state}") @@ -105,6 +105,8 @@ def update( error_message: Optional[str] = None, started_at: Optional[datetime.datetime] = None, finished_at: Optional[datetime.datetime] = None, + last_synced_at: Optional[datetime.datetime] = None, + is_synced: Optional[bool] = None, with_commit: bool = True, ) -> CognitionIntegration: integration: CognitionIntegration = get_by_id(id) @@ -125,7 +127,10 @@ def update( integration.error_message = error_message if started_at is not None: integration.started_at = started_at + if last_synced_at is not None: + integration.last_synced_at = last_synced_at + integration.is_synced = is_synced integration.finished_at = finished_at general.add(integration, with_commit) diff --git a/models.py b/models.py index 29b4dea8..7d8ff3ce 100644 --- a/models.py +++ b/models.py @@ -2118,7 +2118,7 @@ class CognitionIntegration(Base): llm_config = Column(JSON) error_message = Column(String) - is_synced = Column(Boolean, default=False) + is_synced = Column(Boolean, nullable=True) last_synced_at = Column(DateTime) From 5a351161a47d273edfe2e278f911ab859b3cbf4d Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Tue, 3 Jun 2025 15:41:56 +0200 Subject: [PATCH 047/114] fix model --- models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models.py b/models.py index e80264e3..5062781b 100644 --- a/models.py +++ b/models.py @@ -1949,7 +1949,7 @@ class CognitionGroupMember(Base): id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) group_id = Column( UUID(as_uuid=True), - ForeignKey(f"{Tablenames.GROUP_MEMBER.value}.id", ondelete="CASCADE"), + ForeignKey(f"cognition.{Tablenames.GROUP.value}.id", ondelete="CASCADE"), index=True, ) user_id = Column( From 00c46d685075debb010c12fe3bf1bdcb36603ee0 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 3 Jun 2025 16:37:56 +0200 Subject: [PATCH 048/114] perf: add get_all integrations --- cognition_objects/integration.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index 4f465736..7b9d183d 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -31,6 +31,13 @@ def get( return query.order_by(CognitionIntegration.created_at).all() +def get_all(integration_type: Optional[str] = None) -> List[CognitionIntegration]: + query = session.query(CognitionIntegration) + if integration_type: + query = query.filter(CognitionIntegration.type == integration_type) + return query.order_by(CognitionIntegration.created_at).all() + + def get_all_by_project_id(project_id: str) -> List[CognitionIntegration]: return ( session.query(CognitionIntegration) From 08e04a62b62a5becc77062b00d01bf94866afc8f Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 3 Jun 2025 16:38:37 +0200 Subject: [PATCH 049/114] chore: add todo comment --- cognition_objects/integration.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index 7b9d183d..b2a802a0 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -31,6 +31,7 @@ def get( return query.order_by(CognitionIntegration.created_at).all() +# TODO: better approach for fetching all integrations to check for updates def get_all(integration_type: Optional[str] = None) -> List[CognitionIntegration]: query = session.query(CognitionIntegration) if integration_type: From 299a4aa8297c9a83d428931d7b88301189e39e66 Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Wed, 4 Jun 2025 11:06:51 +0200 Subject: [PATCH 050/114] permission test --- enums.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/enums.py b/enums.py index 0d3895b9..97e652e7 100644 --- a/enums.py +++ b/enums.py @@ -10,6 +10,7 @@ class DataTypes(Enum): TEXT = "TEXT" LLM_RESPONSE = "LLM_RESPONSE" EMBEDDING_LIST = "EMBEDDING_LIST" # only for embeddings & default hidden + PERMISSION = "PERMISSION" # used for access control UNKNOWN = "UNKNOWN" @@ -157,7 +158,8 @@ class Tablenames(Enum): FULL_ADMIN_ACCESS = "full_admin_access" GROUP = "group" # used for group based access control GROUP_MEMBER = "group_member" # used for group based access control - + PERMISSION = "permission" # used for access control + def snake_case_to_pascal_case(self): # the type name (written in PascalCase) of a table is needed to create backrefs return "".join([word.title() for word in self.value.split("_")]) From 995612b5d7fcb60297455499c189197aa55d9111 Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Wed, 4 Jun 2025 11:53:40 +0200 Subject: [PATCH 051/114] projects with access management --- business_objects/project.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/business_objects/project.py b/business_objects/project.py index d3d192b0..3e7098f7 100644 --- a/business_objects/project.py +++ b/business_objects/project.py @@ -11,6 +11,7 @@ from ..models import ( Project, Record, + Attribute ) from ..util import prevent_sql_injection @@ -155,6 +156,19 @@ def get_all(organization_id: str) -> List[Project]: ) +def get_all_with_access_management(organization_id: str) -> List[Project]: + return ( + session.query(Project) + .join(Attribute, Project.id == Attribute.project_id) + .filter( + Project.organization_id == organization_id, + Attribute.name.in_(["__ACCESS_GROUPS", "__ACCESS_USER"]), + ) + .distinct() + .all() + ) + + def get_all_by_user_organization_id(organization_id: str) -> List[Project]: projects = ( session.query(Project).filter(Project.organization_id == organization_id).all() From fad5cc41dab51481e6930238b4a5397d9e7efb34 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Wed, 4 Jun 2025 12:50:14 +0200 Subject: [PATCH 052/114] perf: add sharepoint db bo --- business_objects/project.py | 11 +++++++++++ integration_objects/sharepoint.py | 17 +++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 integration_objects/sharepoint.py diff --git a/business_objects/project.py b/business_objects/project.py index d3d192b0..5ba723ac 100644 --- a/business_objects/project.py +++ b/business_objects/project.py @@ -609,3 +609,14 @@ def get_project_by_project_id_sql(project_id: str) -> Dict[str, Any]: return value[0] else: return None + + +def get_by_name_and_org_id(name: str, organization_id: str) -> Optional[Project]: + return ( + session.query(Project) + .filter( + Project.name == name, + Project.organization_id == organization_id, + ) + .first() + ) diff --git a/integration_objects/sharepoint.py b/integration_objects/sharepoint.py new file mode 100644 index 00000000..0481745a --- /dev/null +++ b/integration_objects/sharepoint.py @@ -0,0 +1,17 @@ +from typing import Optional +from datetime import datetime +from sqlalchemy import func + +import pytz + +from submodules.model.session import session +from submodules.model.models import CognitionIntegration, IntegrationSharepoint + + +def get_modified_since(integration: CognitionIntegration) -> Optional[datetime]: + modified = ( + session.query(func.max(IntegrationSharepoint.modified)) + .filter(IntegrationSharepoint.integration_id == integration.id) + .first() + )[0] or datetime(1970, 1, 1) + return pytz.UTC.localize(modified) From 264d0fe4b2382a0e8a68248ae10927cae1c26533 Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Wed, 4 Jun 2025 14:24:24 +0200 Subject: [PATCH 053/114] deactivate mock up --- business_objects/record.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/business_objects/record.py b/business_objects/record.py index 7d51a5d5..7e9adc36 100644 --- a/business_objects/record.py +++ b/business_objects/record.py @@ -807,6 +807,25 @@ def delete_user_created_attribute( general.flush_or_commit(with_commit) +def delete_access_management_attributes( + project_id: str, with_commit: bool = True +) -> None: + access_groups_attribute_item = attribute.get_by_name(project_id, "__ACCESS_GROUPS") + access_users_attribute_item = attribute.get_by_name(project_id, "__ACCESS_USERS") + + if access_users_attribute_item and access_groups_attribute_item: + record_items = get_all(project_id=project_id) + for i, record_item in enumerate(record_items): + if record_item.data.get(access_groups_attribute_item.name): + del record_item.data[access_groups_attribute_item.name] + if record_item.data.get(access_users_attribute_item.name): + del record_item.data[access_users_attribute_item.name] + flag_modified(record_item, "data") + if (i + 1) % 1000 == 0: + general.flush_or_commit(with_commit) + general.flush_or_commit(with_commit) + + def delete_duplicated_rats(with_commit: bool = False) -> None: # no project so run for all to prevent expensive join with record table query = """ From f8db2cf7bb8839054904b24494e16a2d755fec53 Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Wed, 4 Jun 2025 16:45:45 +0200 Subject: [PATCH 054/114] get by user id --- business_objects/project.py | 15 ++++++++++++++- cognition_objects/group_member.py | 4 ++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/business_objects/project.py b/business_objects/project.py index 3e7098f7..23390f42 100644 --- a/business_objects/project.py +++ b/business_objects/project.py @@ -162,13 +162,26 @@ def get_all_with_access_management(organization_id: str) -> List[Project]: .join(Attribute, Project.id == Attribute.project_id) .filter( Project.organization_id == organization_id, - Attribute.name.in_(["__ACCESS_GROUPS", "__ACCESS_USER"]), + Attribute.name.in_(["__ACCESS_GROUPS", "__ACCESS_USERS"]), ) .distinct() .all() ) +def check_access_management_active(project_id: str) -> bool: + return ( + session.query(Project) + .join(Attribute, Project.id == Attribute.project_id) + .filter( + Project.id == project_id, + Attribute.name.in_(["__ACCESS_GROUPS", "__ACCESS_USERS"]), + ) + .count() + > 0 + ) + + def get_all_by_user_organization_id(organization_id: str) -> List[Project]: projects = ( session.query(Project).filter(Project.organization_id == organization_id).all() diff --git a/cognition_objects/group_member.py b/cognition_objects/group_member.py index f221c1f2..a5c25344 100644 --- a/cognition_objects/group_member.py +++ b/cognition_objects/group_member.py @@ -22,6 +22,10 @@ def get_by_group_and_user(group_id: str, user_id: str) -> CognitionGroupMember: ) +def get_by_user_id(user_id: str) -> list: + return session.query(CognitionGroupMember).filter(CognitionGroupMember.user_id == user_id).all() + + def get_all_by_group(group_id: str) -> list: return session.query(CognitionGroupMember).filter(CognitionGroupMember.group_id == group_id).all() From 2b86f5e21b96c1c4075df504b25629e3446f463e Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Wed, 4 Jun 2025 17:49:56 +0200 Subject: [PATCH 055/114] enable list payload --- business_objects/embedding.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/business_objects/embedding.py b/business_objects/embedding.py index 1e02d222..0b770ca6 100644 --- a/business_objects/embedding.py +++ b/business_objects/embedding.py @@ -321,8 +321,11 @@ def __build_payload_selector( if ( data_type != enums.DataTypes.TEXT.value and data_type != enums.DataTypes.LLM_RESPONSE.value + and data_type != enums.DataTypes.PERMISSION.value ): payload_selector += f"'{attr}', (r.\"data\"->>'{attr}')::{data_type}" + if data_type == enums.DataTypes.PERMISSION.value: + payload_selector += f"'{attr}', r.\"data\"->'{attr}'" else: payload_selector += f"'{attr}', r.\"data\"->>'{attr}'" payload_selector = f"json_build_object({payload_selector}) payload" From 19768ddb665a03fb997f6203ea54c985f6874914 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 5 Jun 2025 01:33:31 +0200 Subject: [PATCH 056/114] perf: integration update --- cognition_objects/integration.py | 44 ++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index b2a802a0..c0e2b7aa 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -20,7 +20,15 @@ def get_by_id(id: str) -> CognitionIntegration: ) -def get( +# TODO: better approach for fetching all integrations to check for updates +def get_all(integration_type: Optional[str] = None) -> List[CognitionIntegration]: + query = session.query(CognitionIntegration) + if integration_type: + query = query.filter(CognitionIntegration.type == integration_type) + return query.order_by(CognitionIntegration.created_at).all() + + +def get_all_in_org( org_id: str, integration_type: Optional[str] = None ) -> List[CognitionIntegration]: query = session.query(CognitionIntegration).filter( @@ -31,12 +39,38 @@ def get( return query.order_by(CognitionIntegration.created_at).all() -# TODO: better approach for fetching all integrations to check for updates -def get_all(integration_type: Optional[str] = None) -> List[CognitionIntegration]: - query = session.query(CognitionIntegration) +def get_all_in_org_paginated( + org_id: str, + integration_type: Optional[str] = None, + page: int = 1, + page_size: int = 10, +) -> List[CognitionIntegration]: + schema_name = CognitionIntegration.__table__.schema or "public" + table_name = f"{schema_name}.{CognitionIntegration.__tablename__}" + + first_page = (page - 1) * page_size + last_page = page * page_size + + sql = f""" + SELECT id FROM ( + SELECT + ROW_NUMBER () OVER(PARTITION BY intg.id ORDER BY intg.created_at ASC) rn, + intg.id + FROM {table_name} intg + WHERE intg.organization_id = '{org_id}' + ) pages + WHERE rn BETWEEN {first_page} AND {last_page} + """ + integration_ids = general.execute_all(sql) + if not integration_ids: + return [] + + query = session.query(CognitionIntegration).filter( + CognitionIntegration.id.in_([row[0] for row in integration_ids]) + ) if integration_type: query = query.filter(CognitionIntegration.type == integration_type) - return query.order_by(CognitionIntegration.created_at).all() + return query.order_by(CognitionIntegration.created_at.desc()).all() def get_all_by_project_id(project_id: str) -> List[CognitionIntegration]: From 72319528d9873de0dbc3b62591568615f2b2cf66 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 5 Jun 2025 17:41:34 +0200 Subject: [PATCH 057/114] perf: tech discussion feedback --- business_objects/record.py | 18 +++++++++++++++++- cognition_objects/integration.py | 1 - enums.py | 4 ++-- integration_objects/__init__.py | 11 ++++++----- 4 files changed, 25 insertions(+), 9 deletions(-) diff --git a/business_objects/record.py b/business_objects/record.py index b4c47c10..7236eafd 100644 --- a/business_objects/record.py +++ b/business_objects/record.py @@ -1,6 +1,6 @@ from __future__ import with_statement from typing import List, Dict, Any, Optional, Tuple, Iterable -from sqlalchemy import cast, Text +from sqlalchemy import cast, Text, String from sqlalchemy.orm.attributes import flag_modified from sqlalchemy.sql.expression import bindparam from sqlalchemy import update @@ -925,3 +925,19 @@ def get_first_no_text_column(project_id: str, record_id: str) -> str: WHERE r.project_id = '{project_id}' AND r.id = '{record_id}' """ return general.execute_first(query)[0] + + +def get_record_ids_by_running_ids(project_id: str, running_ids: List[int]) -> List[str]: + return [ + row[0] + for row in ( + session.query(cast(Record.id, String)) + .filter( + Record.project_id == project_id, + Record.data[attribute.get_running_id_name(project_id)] + .as_integer() + .in_(running_ids), + ) + .all() + ) + ] diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index c0e2b7aa..b188cd40 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -20,7 +20,6 @@ def get_by_id(id: str) -> CognitionIntegration: ) -# TODO: better approach for fetching all integrations to check for updates def get_all(integration_type: Optional[str] = None) -> List[CognitionIntegration]: query = session.query(CognitionIntegration) if integration_type: diff --git a/enums.py b/enums.py index 69bc3d6e..4da99fae 100644 --- a/enums.py +++ b/enums.py @@ -931,10 +931,10 @@ class IntegrationMetadata(Enum): Example: IntegrationMetadata.from_string("PDF") - # returns: {"source", "delta_criteria", "minio_file_name", "file_path", "page", "total_pages", "title"} + # returns: {"source", "minio_file_name", "file_path", "page", "total_pages", "title"} """ - __DEFAULT__ = {"source", "delta_criteria", "minio_file_name"} + __DEFAULT__ = {"source", "minio_file_name", "running_id"} GITHUB_FILE = {"path", "sha", "code_language"} GITHUB_ISSUE = {"url", "state", "number", "assignee", "milestone"} diff --git a/integration_objects/__init__.py b/integration_objects/__init__.py index c09f1c86..8910d3b2 100644 --- a/integration_objects/__init__.py +++ b/integration_objects/__init__.py @@ -102,14 +102,13 @@ def create( with_commit: bool = True, **metadata, ) -> object: - kwargs = __get_supported_metadata(IntegrationModel.__tablename__, metadata) integration_record = IntegrationModel( created_by=created_by, integration_id=integration_id, running_id=running_id, created_at=created_at, id=id, - **kwargs, + **metadata, ) general.add(integration_record, with_commit) @@ -135,8 +134,7 @@ def update( integration_record.updated_at = updated_at record_updated = False - kwargs = __get_supported_metadata(IntegrationModel.__tablename__, metadata) - for key, value in kwargs.items(): + for key, value in metadata.items(): if not hasattr(integration_record, key): raise ValueError( f"Invalid field '{key}' for {IntegrationModel.__tablename__}" @@ -174,7 +172,10 @@ def __get_supported_metadata( table_name: str, metadata: Dict[str, Union[str, int, float, bool]] ) -> None: supported_keys = IntegrationMetadata.from_table_name(table_name) - return {key: metadata[key] for key in supported_keys.intersection(metadata.keys())} + supported_metadata = { + key: metadata[key] for key in supported_keys.intersection(metadata.keys()) + } + return __rename_metadata(table_name, supported_metadata) def __rename_metadata( From 7e88d90d1895f0887631552134dd2eb63bd971cc Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Mon, 9 Jun 2025 10:45:46 +0200 Subject: [PATCH 058/114] perf: get integrations updates --- cognition_objects/integration.py | 6 +++--- integration_objects/__init__.py | 19 ++++++++++--------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index b188cd40..17fc766c 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -24,7 +24,7 @@ def get_all(integration_type: Optional[str] = None) -> List[CognitionIntegration query = session.query(CognitionIntegration) if integration_type: query = query.filter(CognitionIntegration.type == integration_type) - return query.order_by(CognitionIntegration.created_at).all() + return query.order_by(CognitionIntegration.created_at.desc()).all() def get_all_in_org( @@ -35,7 +35,7 @@ def get_all_in_org( ) if integration_type: query = query.filter(CognitionIntegration.type == integration_type) - return query.order_by(CognitionIntegration.created_at).all() + return query.order_by(CognitionIntegration.created_at.desc()).all() def get_all_in_org_paginated( @@ -78,7 +78,7 @@ def get_all_by_project_id(project_id: str) -> List[CognitionIntegration]: .filter( CognitionIntegration.project_id == project_id, ) - .order_by(CognitionIntegration.created_at.asc()) + .order_by(CognitionIntegration.created_at.desc()) .all() ) diff --git a/integration_objects/__init__.py b/integration_objects/__init__.py index 8910d3b2..50b3f1d5 100644 --- a/integration_objects/__init__.py +++ b/integration_objects/__init__.py @@ -9,15 +9,16 @@ from ..enums import IntegrationMetadata -def get(IntegrationModel: type, id: str, integration_id: str) -> object: - return ( - session.query(IntegrationModel) - .filter( - IntegrationModel.id == id, - IntegrationModel.integration_id == integration_id, - ) - .first() +def get( + IntegrationModel: type, integration_id: str, id: Optional[str] = None +) -> object: + query = session.query(IntegrationModel).filter( + IntegrationModel.integration_id == integration_id, ) + if id is not None: + query = query.filter(IntegrationModel.id == id) + return query.first() + return query.order_by(IntegrationModel.created_at.desc()).all() def get_by_id(IntegrationModel: type, id: str) -> object: @@ -125,7 +126,7 @@ def update( updated_at: Optional[datetime] = None, **metadata, ) -> object: - integration_record = get(IntegrationModel, id, integration_id) + integration_record = get(IntegrationModel, integration_id, id) integration_record.updated_by = updated_by if running_id is not None: From 57bfe5d6dfbf557a660e17fb6100ca46b8b42c35 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 10 Jun 2025 11:28:26 +0200 Subject: [PATCH 059/114] perf: integration updates --- cognition_objects/integration.py | 2 +- util.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index 17fc766c..ff34947f 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -83,7 +83,7 @@ def get_all_by_project_id(project_id: str) -> List[CognitionIntegration]: ) -def count_org_integrations(org_id: str) -> int: +def count_org_integrations(org_id: str) -> Dict[str, int]: counts = ( session.query(CognitionIntegration.type, func.count(CognitionIntegration.id)) .filter( diff --git a/util.py b/util.py index 37a80dff..71bc856f 100644 --- a/util.py +++ b/util.py @@ -208,6 +208,13 @@ def to_camel_case(name: str): return "".join([name[0].lower(), name[1:]]) +def to_snake_case(name: str): + if not is_camel_case(name): + return name + name = sub("(.)([A-Z][a-z]+)", r"\1_\2", name) + return sub("([a-z0-9])([A-Z])", r"\1_\2", name).lower() + + def is_list_like(value: Any) -> bool: return ( isinstance(value, collections_abc_Iterable) From c67bae4a2c3c7a69aa6fa9206db68f1d106425a2 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Wed, 11 Jun 2025 23:38:07 +0200 Subject: [PATCH 060/114] perf: introduce managers --- cognition_objects/integration.py | 19 ++- integration_objects/__init__.py | 192 --------------------------- integration_objects/manager.py | 212 ++++++++++++++++++++++++++++++ integration_objects/sharepoint.py | 17 --- 4 files changed, 224 insertions(+), 216 deletions(-) create mode 100644 integration_objects/manager.py delete mode 100644 integration_objects/sharepoint.py diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index ff34947f..ccdb75a6 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -83,6 +83,18 @@ def get_all_by_project_id(project_id: str) -> List[CognitionIntegration]: ) +def get_last_synced_at( + org_id: str, integration_type: Optional[str] = None +) -> List[CognitionIntegration]: + query = session.query(func.max(CognitionIntegration.last_synced_at)).filter( + CognitionIntegration.organization_id == org_id + ) + if integration_type: + query = query.filter(CognitionIntegration.type == integration_type) + result = query.first() + return result[0] if result else None + + def count_org_integrations(org_id: str) -> Dict[str, int]: counts = ( session.query(CognitionIntegration.type, func.count(CognitionIntegration.id)) @@ -194,13 +206,6 @@ def execution_finished(id: str) -> bool: ) -def clear_history(id: str) -> None: - integration: CognitionIntegration = get_by_id(id) - integration.extract_history = {} - integration.state = CognitionMarkdownFileState.QUEUE.value - general.add(integration, True) - - def delete_many( ids: List[str], delete_refinery_projects: bool = False, with_commit: bool = True ) -> None: diff --git a/integration_objects/__init__.py b/integration_objects/__init__.py index 50b3f1d5..e69de29b 100644 --- a/integration_objects/__init__.py +++ b/integration_objects/__init__.py @@ -1,192 +0,0 @@ -from typing import List, Optional, Dict, Union - -from sqlalchemy import func -from datetime import datetime - -from ..business_objects import general -from ..cognition_objects import integration as integration_db_bo -from ..session import session -from ..enums import IntegrationMetadata - - -def get( - IntegrationModel: type, integration_id: str, id: Optional[str] = None -) -> object: - query = session.query(IntegrationModel).filter( - IntegrationModel.integration_id == integration_id, - ) - if id is not None: - query = query.filter(IntegrationModel.id == id) - return query.first() - return query.order_by(IntegrationModel.created_at.desc()).all() - - -def get_by_id(IntegrationModel: type, id: str) -> object: - return session.query(IntegrationModel).filter(IntegrationModel.id == id).first() - - -def get_by_running_id( - IntegrationModel: type, integration_id: str, running_id: int -) -> object: - return ( - session.query(IntegrationModel) - .filter( - IntegrationModel.integration_id == integration_id, - IntegrationModel.running_id == running_id, - ) - .first() - ) - - -def get_by_source(IntegrationModel: type, integration_id: str, source: str) -> object: - return ( - session.query(IntegrationModel) - .filter( - IntegrationModel.integration_id == integration_id, - IntegrationModel.source == source, - ) - .first() - ) - - -def get_all_by_integration_id( - IntegrationModel: type, integration_id: str -) -> List[object]: - return ( - session.query(IntegrationModel) - .filter(IntegrationModel.integration_id == integration_id) - .order_by(IntegrationModel.created_at) - .all() - ) - - -def get_all_by_project_id(IntegrationModel: type, project_id: str) -> List[object]: - integrations = integration_db_bo.get_all_by_project_id(project_id) - return ( - session.query(IntegrationModel) - .filter( - IntegrationModel.integration_id.in_([i.id for i in integrations]), - ) - .order_by(IntegrationModel.created_at.asc()) - .all() - ) - - -def get_existing_integration_records( - IntegrationModel, integration_id: str -) -> Dict[str, object]: - return { - integration.source: integration - for integration in get_all_by_integration_id(IntegrationModel, integration_id) - } - - -def get_running_ids(IntegrationModel: type, integration_id: str) -> int: - return dict( - session.query( - IntegrationModel.source, - func.coalesce(func.max(IntegrationModel.running_id), 0), - ) - .filter(IntegrationModel.integration_id == integration_id) - .group_by(IntegrationModel.source) - .all() - ) - - -def create( - IntegrationModel: type, - created_by: str, - integration_id: str, - running_id: int, - created_at: Optional[datetime] = None, - id: Optional[str] = None, - with_commit: bool = True, - **metadata, -) -> object: - integration_record = IntegrationModel( - created_by=created_by, - integration_id=integration_id, - running_id=running_id, - created_at=created_at, - id=id, - **metadata, - ) - - general.add(integration_record, with_commit) - - return integration_record - - -def update( - IntegrationModel: type, - id: str, - integration_id: str, - updated_by: str, - running_id: Optional[int] = None, - updated_at: Optional[datetime] = None, - **metadata, -) -> object: - integration_record = get(IntegrationModel, integration_id, id) - integration_record.updated_by = updated_by - - if running_id is not None: - integration_record.running_id = running_id - if updated_at is not None: - integration_record.updated_at = updated_at - - record_updated = False - for key, value in metadata.items(): - if not hasattr(integration_record, key): - raise ValueError( - f"Invalid field '{key}' for {IntegrationModel.__tablename__}" - ) - existing_value = getattr(integration_record, key, None) - if value is not None and value != existing_value: - setattr(integration_record, key, value) - record_updated = True - - if record_updated: - general.add(integration_record, with_commit=True) - - return integration_record - - -def delete_many( - IntegrationModel: type, - ids: List[str], - with_commit: bool = False, -) -> None: - integration_records = session.query(IntegrationModel).filter( - IntegrationModel.id.in_(ids) - ) - integration_records.delete(synchronize_session=False) - general.flush_or_commit(with_commit) - - -def clear_history(IntegrationModel: type, id: str, with_commit: bool = False) -> None: - integration_record = get_by_id(IntegrationModel, id) - integration_record.delta_criteria = None - general.add(integration_record, with_commit) - - -def __get_supported_metadata( - table_name: str, metadata: Dict[str, Union[str, int, float, bool]] -) -> None: - supported_keys = IntegrationMetadata.from_table_name(table_name) - supported_metadata = { - key: metadata[key] for key in supported_keys.intersection(metadata.keys()) - } - return __rename_metadata(table_name, supported_metadata) - - -def __rename_metadata( - table_name: str, metadata: Dict[str, Union[str, int, float, bool]] -) -> Dict[str, object]: - rename_keys = { - "id": f"{table_name}_id", - "created_by": f"{table_name}_created_by", - "created_at": f"{table_name}_created_at", - "updated_by": f"{table_name}_updated_by", - "updated_at": f"{table_name}_updated_at", - } - return {rename_keys.get(key, key): value for key, value in metadata.items()} diff --git a/integration_objects/manager.py b/integration_objects/manager.py new file mode 100644 index 00000000..e68de9dc --- /dev/null +++ b/integration_objects/manager.py @@ -0,0 +1,212 @@ +from typing import Optional +from datetime import datetime +from sqlalchemy import func + +import pytz + +from submodules.model.session import session +from submodules.model.models import CognitionIntegration, IntegrationSharepoint + + +def get_modified_since(integration: CognitionIntegration) -> Optional[datetime]: + modified = ( + session.query(func.max(IntegrationSharepoint.modified)) + .filter(IntegrationSharepoint.integration_id == integration.id) + .first() + )[0] or datetime(1970, 1, 1) + return pytz.UTC.localize(modified) + + +from typing import List, Optional, Dict, Union + +from sqlalchemy import func +from datetime import datetime + +from ..business_objects import general +from ..cognition_objects import integration as integration_db_bo +from ..session import session +from ..enums import IntegrationMetadata + + +def get( + IntegrationModel: type, integration_id: str, id: Optional[str] = None +) -> object: + query = session.query(IntegrationModel).filter( + IntegrationModel.integration_id == integration_id, + ) + if id is not None: + query = query.filter(IntegrationModel.id == id) + return query.first() + return query.order_by(IntegrationModel.created_at.desc()).all() + + +def get_by_id(IntegrationModel: type, id: str) -> object: + return session.query(IntegrationModel).filter(IntegrationModel.id == id).first() + + +def get_by_running_id( + IntegrationModel: type, integration_id: str, running_id: int +) -> object: + return ( + session.query(IntegrationModel) + .filter( + IntegrationModel.integration_id == integration_id, + IntegrationModel.running_id == running_id, + ) + .first() + ) + + +def get_by_source(IntegrationModel: type, integration_id: str, source: str) -> object: + return ( + session.query(IntegrationModel) + .filter( + IntegrationModel.integration_id == integration_id, + IntegrationModel.source == source, + ) + .first() + ) + + +def get_all_by_integration_id( + IntegrationModel: type, integration_id: str +) -> List[object]: + return ( + session.query(IntegrationModel) + .filter(IntegrationModel.integration_id == integration_id) + .order_by(IntegrationModel.created_at) + .all() + ) + + +def get_all_by_project_id(IntegrationModel: type, project_id: str) -> List[object]: + integrations = integration_db_bo.get_all_by_project_id(project_id) + return ( + session.query(IntegrationModel) + .filter( + IntegrationModel.integration_id.in_([i.id for i in integrations]), + ) + .order_by(IntegrationModel.created_at.asc()) + .all() + ) + + +def get_existing_integration_records( + IntegrationModel, integration_id: str +) -> Dict[str, object]: + return { + record.source: record + for record in get_all_by_integration_id(IntegrationModel, integration_id) + } + + +def get_running_ids(IntegrationModel: type, integration_id: str) -> int: + return dict( + session.query( + IntegrationModel.source, + func.coalesce(func.max(IntegrationModel.running_id), 0), + ) + .filter(IntegrationModel.integration_id == integration_id) + .group_by(IntegrationModel.source) + .all() + ) + + +def create( + IntegrationModel: type, + created_by: str, + integration_id: str, + running_id: int, + created_at: Optional[datetime] = None, + id: Optional[str] = None, + with_commit: bool = True, + **metadata, +) -> object: + integration_record = IntegrationModel( + created_by=created_by, + integration_id=integration_id, + running_id=running_id, + created_at=created_at, + id=id, + **metadata, + ) + + general.add(integration_record, with_commit) + + return integration_record + + +def update( + IntegrationModel: type, + id: str, + integration_id: str, + updated_by: str, + running_id: Optional[int] = None, + updated_at: Optional[datetime] = None, + with_commit: bool = True, + **metadata, +) -> object: + integration_record = get(IntegrationModel, integration_id, id) + integration_record.updated_by = updated_by + + if running_id is not None: + integration_record.running_id = running_id + if updated_at is not None: + integration_record.updated_at = updated_at + + record_updated = False + for key, value in metadata.items(): + if not hasattr(integration_record, key): + raise ValueError( + f"Invalid field '{key}' for {IntegrationModel.__tablename__}" + ) + existing_value = getattr(integration_record, key, None) + if value is not None and value != existing_value: + setattr(integration_record, key, value) + record_updated = True + + if record_updated: + general.add(integration_record, with_commit=with_commit) + + return integration_record + + +def delete_many( + IntegrationModel: type, + ids: List[str], + with_commit: bool = False, +) -> None: + integration_records = session.query(IntegrationModel).filter( + IntegrationModel.id.in_(ids) + ) + integration_records.delete(synchronize_session=False) + general.flush_or_commit(with_commit) + + +def clear_history(IntegrationModel: type, id: str, with_commit: bool = False) -> None: + integration_record = get_by_id(IntegrationModel, id) + integration_record.delta_criteria = None + general.add(integration_record, with_commit) + + +def _get_supported_metadata( + table_name: str, metadata: Dict[str, Union[str, int, float, bool]] +) -> None: + supported_keys = IntegrationMetadata.from_table_name(table_name) + supported_metadata = { + key: metadata[key] for key in supported_keys.intersection(metadata.keys()) + } + return __rename_metadata(table_name, supported_metadata) + + +def __rename_metadata( + table_name: str, metadata: Dict[str, Union[str, int, float, bool]] +) -> Dict[str, object]: + rename_keys = { + "id": f"{table_name}_id", + "created_by": f"{table_name}_created_by", + "created_at": f"{table_name}_created_at", + "updated_by": f"{table_name}_updated_by", + "updated_at": f"{table_name}_updated_at", + } + return {rename_keys.get(key, key): value for key, value in metadata.items()} diff --git a/integration_objects/sharepoint.py b/integration_objects/sharepoint.py deleted file mode 100644 index 0481745a..00000000 --- a/integration_objects/sharepoint.py +++ /dev/null @@ -1,17 +0,0 @@ -from typing import Optional -from datetime import datetime -from sqlalchemy import func - -import pytz - -from submodules.model.session import session -from submodules.model.models import CognitionIntegration, IntegrationSharepoint - - -def get_modified_since(integration: CognitionIntegration) -> Optional[datetime]: - modified = ( - session.query(func.max(IntegrationSharepoint.modified)) - .filter(IntegrationSharepoint.integration_id == integration.id) - .first() - )[0] or datetime(1970, 1, 1) - return pytz.UTC.localize(modified) From 0373a58f73adeb4da78a5259f8ab6c6c1e1929ac Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Wed, 11 Jun 2025 23:43:41 +0200 Subject: [PATCH 061/114] chore: typing --- integration_objects/manager.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/integration_objects/manager.py b/integration_objects/manager.py index e68de9dc..2de96426 100644 --- a/integration_objects/manager.py +++ b/integration_objects/manager.py @@ -1,4 +1,4 @@ -from typing import Optional +from typing import Optional, Type from datetime import datetime from sqlalchemy import func @@ -29,7 +29,7 @@ def get_modified_since(integration: CognitionIntegration) -> Optional[datetime]: def get( - IntegrationModel: type, integration_id: str, id: Optional[str] = None + IntegrationModel: Type, integration_id: str, id: Optional[str] = None ) -> object: query = session.query(IntegrationModel).filter( IntegrationModel.integration_id == integration_id, @@ -40,12 +40,12 @@ def get( return query.order_by(IntegrationModel.created_at.desc()).all() -def get_by_id(IntegrationModel: type, id: str) -> object: +def get_by_id(IntegrationModel: Type, id: str) -> object: return session.query(IntegrationModel).filter(IntegrationModel.id == id).first() def get_by_running_id( - IntegrationModel: type, integration_id: str, running_id: int + IntegrationModel: Type, integration_id: str, running_id: int ) -> object: return ( session.query(IntegrationModel) @@ -57,7 +57,7 @@ def get_by_running_id( ) -def get_by_source(IntegrationModel: type, integration_id: str, source: str) -> object: +def get_by_source(IntegrationModel: Type, integration_id: str, source: str) -> object: return ( session.query(IntegrationModel) .filter( @@ -69,7 +69,7 @@ def get_by_source(IntegrationModel: type, integration_id: str, source: str) -> o def get_all_by_integration_id( - IntegrationModel: type, integration_id: str + IntegrationModel: Type, integration_id: str ) -> List[object]: return ( session.query(IntegrationModel) @@ -79,7 +79,7 @@ def get_all_by_integration_id( ) -def get_all_by_project_id(IntegrationModel: type, project_id: str) -> List[object]: +def get_all_by_project_id(IntegrationModel: Type, project_id: str) -> List[object]: integrations = integration_db_bo.get_all_by_project_id(project_id) return ( session.query(IntegrationModel) @@ -100,7 +100,7 @@ def get_existing_integration_records( } -def get_running_ids(IntegrationModel: type, integration_id: str) -> int: +def get_running_ids(IntegrationModel: Type, integration_id: str) -> int: return dict( session.query( IntegrationModel.source, @@ -113,7 +113,7 @@ def get_running_ids(IntegrationModel: type, integration_id: str) -> int: def create( - IntegrationModel: type, + IntegrationModel: Type, created_by: str, integration_id: str, running_id: int, @@ -137,7 +137,7 @@ def create( def update( - IntegrationModel: type, + IntegrationModel: Type, id: str, integration_id: str, updated_by: str, @@ -172,7 +172,7 @@ def update( def delete_many( - IntegrationModel: type, + IntegrationModel: Type, ids: List[str], with_commit: bool = False, ) -> None: @@ -183,7 +183,7 @@ def delete_many( general.flush_or_commit(with_commit) -def clear_history(IntegrationModel: type, id: str, with_commit: bool = False) -> None: +def clear_history(IntegrationModel: Type, id: str, with_commit: bool = False) -> None: integration_record = get_by_id(IntegrationModel, id) integration_record.delta_criteria = None general.add(integration_record, with_commit) From 0bdebd5389fa7cc824c3c099ed3c39490fc0ceee Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 12 Jun 2025 01:20:28 +0200 Subject: [PATCH 062/114] perf: access + check for updates --- cognition_objects/integration.py | 18 ++++++++++++++++-- cognition_objects/integration_access.py | 8 ++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index ccdb75a6..118f83fe 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -20,21 +20,35 @@ def get_by_id(id: str) -> CognitionIntegration: ) -def get_all(integration_type: Optional[str] = None) -> List[CognitionIntegration]: +def get_all( + integration_type: Optional[str] = None, + exclude_failed: Optional[bool] = False, + only_synced: Optional[bool] = False, +) -> List[CognitionIntegration]: query = session.query(CognitionIntegration) if integration_type: query = query.filter(CognitionIntegration.type == integration_type) + if exclude_failed: + query = query.filter( + CognitionIntegration.state != CognitionMarkdownFileState.FAILED.value + ) + if only_synced: + query = query.filter(CognitionIntegration.is_synced == True) return query.order_by(CognitionIntegration.created_at.desc()).all() def get_all_in_org( - org_id: str, integration_type: Optional[str] = None + org_id: str, + integration_type: Optional[str] = None, + only_synced: Optional[bool] = False, ) -> List[CognitionIntegration]: query = session.query(CognitionIntegration).filter( CognitionIntegration.organization_id == org_id ) if integration_type: query = query.filter(CognitionIntegration.type == integration_type) + if only_synced: + query = query.filter(CognitionIntegration.is_synced == True) return query.order_by(CognitionIntegration.created_at.desc()).all() diff --git a/cognition_objects/integration_access.py b/cognition_objects/integration_access.py index 522239cf..cd5755ae 100644 --- a/cognition_objects/integration_access.py +++ b/cognition_objects/integration_access.py @@ -37,6 +37,14 @@ def get( ) +def get_all() -> List[CognitionIntegrationAccess]: + return ( + session.query(CognitionIntegrationAccess) + .order_by(CognitionIntegrationAccess.created_at.desc()) + .all() + ) + + def create( org_id: str, user_id: str, From ffa35bf0abd3e1bb7100b92350131eb18e15b593 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 13 Jun 2025 09:26:49 +0200 Subject: [PATCH 063/114] perf: update integration --- cognition_objects/integration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index 118f83fe..b19708e8 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -99,7 +99,7 @@ def get_all_by_project_id(project_id: str) -> List[CognitionIntegration]: def get_last_synced_at( org_id: str, integration_type: Optional[str] = None -) -> List[CognitionIntegration]: +) -> datetime.datetime: query = session.query(func.max(CognitionIntegration.last_synced_at)).filter( CognitionIntegration.organization_id == org_id ) From 342b221b7b2aa7a2f437b308e2b642f853ab5bac Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 13 Jun 2025 12:10:19 +0200 Subject: [PATCH 064/114] perf: add delta url to sharepoint integration --- integration_objects/manager.py | 3 +++ models.py | 1 + 2 files changed, 4 insertions(+) diff --git a/integration_objects/manager.py b/integration_objects/manager.py index 2de96426..16f4f50d 100644 --- a/integration_objects/manager.py +++ b/integration_objects/manager.py @@ -143,6 +143,7 @@ def update( updated_by: str, running_id: Optional[int] = None, updated_at: Optional[datetime] = None, + delta_url: Optional[str] = None, with_commit: bool = True, **metadata, ) -> object: @@ -153,6 +154,8 @@ def update( integration_record.running_id = running_id if updated_at is not None: integration_record.updated_at = updated_at + if delta_url is not None: + integration_record.delta_url = delta_url record_updated = False for key, value in metadata.items(): diff --git a/models.py b/models.py index 7d8ff3ce..cae24496 100644 --- a/models.py +++ b/models.py @@ -2294,6 +2294,7 @@ class IntegrationSharepoint(Base): ForeignKey(f"cognition.{Tablenames.INTEGRATION.value}.id", ondelete="CASCADE"), index=True, ) + delta_url = Column(String) running_id = Column(Integer, index=True) source = Column(String, index=True) minio_file_name = Column(String) From c9f4791a85b50e1b237a7c066f98e4ad8ba56b47 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 13 Jun 2025 12:32:02 +0200 Subject: [PATCH 065/114] fix: move delta_url to cognitionintegration --- models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models.py b/models.py index cae24496..e737cacc 100644 --- a/models.py +++ b/models.py @@ -2120,6 +2120,7 @@ class CognitionIntegration(Base): error_message = Column(String) is_synced = Column(Boolean, nullable=True) last_synced_at = Column(DateTime) + delta_url = Column(String) class CognitionIntegrationAccess(Base): @@ -2294,7 +2295,6 @@ class IntegrationSharepoint(Base): ForeignKey(f"cognition.{Tablenames.INTEGRATION.value}.id", ondelete="CASCADE"), index=True, ) - delta_url = Column(String) running_id = Column(Integer, index=True) source = Column(String, index=True) minio_file_name = Column(String) From 02ad8cc4291e37ce0d512daaf04f15146e5f20f1 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 13 Jun 2025 16:53:55 +0200 Subject: [PATCH 066/114] perf: integration updates --- cognition_objects/integration.py | 3 +++ integration_objects/manager.py | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index b19708e8..5c210bd7 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -174,6 +174,7 @@ def update( finished_at: Optional[datetime.datetime] = None, last_synced_at: Optional[datetime.datetime] = None, is_synced: Optional[bool] = None, + delta_url: Optional[str] = None, with_commit: bool = True, ) -> CognitionIntegration: integration: CognitionIntegration = get_by_id(id) @@ -196,6 +197,8 @@ def update( integration.started_at = started_at if last_synced_at is not None: integration.last_synced_at = last_synced_at + if delta_url is not None: + integration.delta_url = delta_url integration.is_synced = is_synced integration.finished_at = finished_at diff --git a/integration_objects/manager.py b/integration_objects/manager.py index 16f4f50d..2de96426 100644 --- a/integration_objects/manager.py +++ b/integration_objects/manager.py @@ -143,7 +143,6 @@ def update( updated_by: str, running_id: Optional[int] = None, updated_at: Optional[datetime] = None, - delta_url: Optional[str] = None, with_commit: bool = True, **metadata, ) -> object: @@ -154,8 +153,6 @@ def update( integration_record.running_id = running_id if updated_at is not None: integration_record.updated_at = updated_at - if delta_url is not None: - integration_record.delta_url = delta_url record_updated = False for key, value in metadata.items(): From 7c023eee7fabd4f2e0e0aae64d673ed2be9f8c39 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Mon, 16 Jun 2025 15:04:52 +0200 Subject: [PATCH 067/114] perf: add updated_by + delta_criteria --- cognition_objects/integration.py | 11 ++++++++--- models.py | 8 +++++++- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index 5c210bd7..c10e7198 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -145,6 +145,7 @@ def create( organization_id=org_id, project_id=project_id, created_by=user_id, + updated_by=user_id, created_at=created_at, started_at=started_at, finished_at=finished_at, @@ -155,6 +156,7 @@ def create( type=integration_type.value, config=integration_config, llm_config=llm_config, + delta_criteria={"delta_url": None}, ) general.add(integration, with_commit) @@ -163,6 +165,7 @@ def create( def update( id: str, + updated_by: Optional[str] = None, name: Optional[str] = None, description: Optional[str] = None, tokenizer: Optional[str] = None, @@ -174,11 +177,13 @@ def update( finished_at: Optional[datetime.datetime] = None, last_synced_at: Optional[datetime.datetime] = None, is_synced: Optional[bool] = None, - delta_url: Optional[str] = None, + delta_criteria: Optional[Dict[str, str]] = None, with_commit: bool = True, ) -> CognitionIntegration: integration: CognitionIntegration = get_by_id(id) + if updated_by is not None: + integration.updated_by = updated_by if name is not None: integration.name = name if description is not None: @@ -197,8 +202,8 @@ def update( integration.started_at = started_at if last_synced_at is not None: integration.last_synced_at = last_synced_at - if delta_url is not None: - integration.delta_url = delta_url + if delta_criteria is not None: + integration.delta_criteria = delta_criteria integration.is_synced = is_synced integration.finished_at = finished_at diff --git a/models.py b/models.py index e737cacc..77a5c011 100644 --- a/models.py +++ b/models.py @@ -2100,6 +2100,12 @@ class CognitionIntegration(Base): index=False, ) created_at = Column(DateTime, default=sql.func.now()) + updated_by = Column( + UUID(as_uuid=True), + ForeignKey(f"{Tablenames.USER.value}.id", ondelete="SET NULL"), + index=False, + ) + updated_at = Column(DateTime, onupdate=sql.func.now()) started_at = Column(DateTime) finished_at = Column(DateTime) name = Column(String) @@ -2120,7 +2126,7 @@ class CognitionIntegration(Base): error_message = Column(String) is_synced = Column(Boolean, nullable=True) last_synced_at = Column(DateTime) - delta_url = Column(String) + delta_criteria = Column(JSON) class CognitionIntegrationAccess(Base): From 5969ab95705880223ac5d1c5232300e3927f6367 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 17 Jun 2025 11:42:29 +0200 Subject: [PATCH 068/114] perf: add delta_criteria field --- enums.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/enums.py b/enums.py index 4da99fae..5754e25d 100644 --- a/enums.py +++ b/enums.py @@ -934,7 +934,7 @@ class IntegrationMetadata(Enum): # returns: {"source", "minio_file_name", "file_path", "page", "total_pages", "title"} """ - __DEFAULT__ = {"source", "minio_file_name", "running_id"} + __DEFAULT__ = {"source", "minio_file_name", "running_id", "delta_criteria"} GITHUB_FILE = {"path", "sha", "code_language"} GITHUB_ISSUE = {"url", "state", "number", "assignee", "milestone"} From a6f6ba27be48ad2e02b0884ffed223c4cdcaf1ca Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Wed, 18 Jun 2025 00:54:17 +0200 Subject: [PATCH 069/114] perf: check for status improvement --- cognition_objects/integration.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index c10e7198..92754722 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -12,6 +12,14 @@ ) +def get_by_ids(ids: List[str]) -> List[CognitionIntegration]: + return ( + session.query(CognitionIntegration) + .filter(CognitionIntegration.id.in_(ids)) + .all() + ) + + def get_by_id(id: str) -> CognitionIntegration: return ( session.query(CognitionIntegration) From 32e689f250fa90de72a8f884eb61f08d4d2e0750 Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Fri, 20 Jun 2025 09:31:43 +0200 Subject: [PATCH 070/114] add meta_data group --- cognition_objects/group.py | 13 +++++++++++++ models.py | 1 + 2 files changed, 14 insertions(+) diff --git a/cognition_objects/group.py b/cognition_objects/group.py index f2e14b7c..4ad7d8db 100644 --- a/cognition_objects/group.py +++ b/cognition_objects/group.py @@ -26,6 +26,19 @@ def get_all(organization_id: str) -> List[CognitionGroup]: ) +def get_all_by_integration_id_permission_grouped( + organization_id: str, integration_id: str +) -> List[CognitionGroup]: + integration_id_json = CognitionGroup.meta_data.op("->>")("integration_id") + + integration_groups = session.query(CognitionGroup).filter(CognitionGroup.organization_id == organization_id, integration_id_json == integration_id).all() + integration_groups_by_permission = {} + for group in integration_groups: + permission_id = group.meta_data.get("permission_id") + integration_groups_by_permission[permission_id] = group + return integration_groups_by_permission + + def create_group( organization_id: str, name: str, diff --git a/models.py b/models.py index 7853d219..e2ebdfe4 100644 --- a/models.py +++ b/models.py @@ -1948,6 +1948,7 @@ class CognitionGroup(Base): ForeignKey(f"{Tablenames.USER.value}.id", ondelete="SET NULL"), index=True, ) + meta_data = Column(JSON) class CognitionGroupMember(Base): From 4cd9462ad07118ee0a9fcf8c25b6473e999963eb Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Fri, 20 Jun 2025 14:24:53 +0200 Subject: [PATCH 071/114] format --- cognition_objects/group.py | 7 ++++++- models.py | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/cognition_objects/group.py b/cognition_objects/group.py index f2e14b7c..ea9ebfd9 100644 --- a/cognition_objects/group.py +++ b/cognition_objects/group.py @@ -12,7 +12,10 @@ def get(group_id: str) -> CognitionGroup: def get_with_organization_id(organization_id: str, group_id: str) -> CognitionGroup: return ( session.query(CognitionGroup) - .filter(CognitionGroup.organization_id == organization_id, CognitionGroup.id == group_id) + .filter( + CognitionGroup.organization_id == organization_id, + CognitionGroup.id == group_id, + ) .first() ) @@ -33,6 +36,7 @@ def create_group( created_by: str, created_at: Optional[datetime] = None, with_commit: bool = False, + meta_data: Optional[dict] = None, ) -> CognitionGroup: group = CognitionGroup( organization_id=organization_id, @@ -40,6 +44,7 @@ def create_group( description=description, created_by=created_by, created_at=created_at, + meta_data=meta_data, ) general.add(group, with_commit) return group diff --git a/models.py b/models.py index 7853d219..e2ebdfe4 100644 --- a/models.py +++ b/models.py @@ -1948,6 +1948,7 @@ class CognitionGroup(Base): ForeignKey(f"{Tablenames.USER.value}.id", ondelete="SET NULL"), index=True, ) + meta_data = Column(JSON) class CognitionGroupMember(Base): From 926fc4c411094ad03dc2451ed88e29f5e97787e6 Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Fri, 20 Jun 2025 14:27:10 +0200 Subject: [PATCH 072/114] model --- models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/models.py b/models.py index 7853d219..e2ebdfe4 100644 --- a/models.py +++ b/models.py @@ -1948,6 +1948,7 @@ class CognitionGroup(Base): ForeignKey(f"{Tablenames.USER.value}.id", ondelete="SET NULL"), index=True, ) + meta_data = Column(JSON) class CognitionGroupMember(Base): From 5bbdf87248c8725bdcba9c8b590bf501d53f630d Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Fri, 20 Jun 2025 15:08:37 +0200 Subject: [PATCH 073/114] sync action --- enums.py | 1 + 1 file changed, 1 insertion(+) diff --git a/enums.py b/enums.py index 3f8c5c7d..d79bf789 100644 --- a/enums.py +++ b/enums.py @@ -513,6 +513,7 @@ class TaskQueueAction(Enum): SEND_WEBSOCKET = "SEND_WEBSOCKET" FINISH_COGNITION_SETUP = "FINISH_COGNITION_SETUP" RUN_WEAK_SUPERVISION = "RUN_WEAK_SUPERVISION" + SYNC_SHAREPOINT_ACCESS = "SYNC_SHAREPOINT_ACCESS" class AgreementType(Enum): From 9f61c3c0b538ebc6fed86020645db8248818ce4f Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Fri, 20 Jun 2025 15:30:53 +0200 Subject: [PATCH 074/114] rename action --- enums.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/enums.py b/enums.py index d79bf789..ff37e41d 100644 --- a/enums.py +++ b/enums.py @@ -513,7 +513,7 @@ class TaskQueueAction(Enum): SEND_WEBSOCKET = "SEND_WEBSOCKET" FINISH_COGNITION_SETUP = "FINISH_COGNITION_SETUP" RUN_WEAK_SUPERVISION = "RUN_WEAK_SUPERVISION" - SYNC_SHAREPOINT_ACCESS = "SYNC_SHAREPOINT_ACCESS" + POSTPROCESS_INTEGRATION = "POSTPROCESS_INTEGRATION" class AgreementType(Enum): From b9ad6b6943b24d52092792c0cc5e46ad0f9f5a9d Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 20 Jun 2025 17:00:22 +0200 Subject: [PATCH 075/114] perf: dynamic 'by' record grouping --- integration_objects/manager.py | 34 ++++++++-------------------------- 1 file changed, 8 insertions(+), 26 deletions(-) diff --git a/integration_objects/manager.py b/integration_objects/manager.py index 2de96426..92a010e2 100644 --- a/integration_objects/manager.py +++ b/integration_objects/manager.py @@ -1,27 +1,7 @@ -from typing import Optional, Type +from typing import List, Optional, Dict, Union, Type from datetime import datetime from sqlalchemy import func -import pytz - -from submodules.model.session import session -from submodules.model.models import CognitionIntegration, IntegrationSharepoint - - -def get_modified_since(integration: CognitionIntegration) -> Optional[datetime]: - modified = ( - session.query(func.max(IntegrationSharepoint.modified)) - .filter(IntegrationSharepoint.integration_id == integration.id) - .first() - )[0] or datetime(1970, 1, 1) - return pytz.UTC.localize(modified) - - -from typing import List, Optional, Dict, Union - -from sqlalchemy import func -from datetime import datetime - from ..business_objects import general from ..cognition_objects import integration as integration_db_bo from ..session import session @@ -92,22 +72,24 @@ def get_all_by_project_id(IntegrationModel: Type, project_id: str) -> List[objec def get_existing_integration_records( - IntegrationModel, integration_id: str + IntegrationModel, integration_id: str, by: Optional[str] = "source" ) -> Dict[str, object]: return { - record.source: record + getattr(record, by, record.source): record for record in get_all_by_integration_id(IntegrationModel, integration_id) } -def get_running_ids(IntegrationModel: Type, integration_id: str) -> int: +def get_running_ids( + IntegrationModel: Type, integration_id: str, by: Optional[str] = "source" +) -> int: return dict( session.query( - IntegrationModel.source, + getattr(IntegrationModel, by, IntegrationModel.source), func.coalesce(func.max(IntegrationModel.running_id), 0), ) .filter(IntegrationModel.integration_id == integration_id) - .group_by(IntegrationModel.source) + .group_by(getattr(IntegrationModel, by, IntegrationModel.source)) .all() ) From 2e769dd801018670f06575d84f1cd79c353d001f Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 24 Jun 2025 02:46:12 +0200 Subject: [PATCH 076/114] style: arguments newlines in function definitions --- cognition_objects/integration.py | 4 +++ integration_objects/manager.py | 44 +++++++++++++++++++++++++------- 2 files changed, 39 insertions(+), 9 deletions(-) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index 92754722..bacc63c3 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -2,6 +2,7 @@ import datetime from fastapi import HTTPException from sqlalchemy import func +from sqlalchemy.orm.attributes import flag_modified from ..business_objects import general from ..session import session @@ -202,8 +203,10 @@ def update( integration.state = state.value if integration_config is not None: integration.config = integration_config + flag_modified(integration, "config") if llm_config is not None: integration.llm_config = llm_config + flag_modified(integration, "llm_config") if error_message is not None: integration.error_message = error_message if started_at is not None: @@ -212,6 +215,7 @@ def update( integration.last_synced_at = last_synced_at if delta_criteria is not None: integration.delta_criteria = delta_criteria + flag_modified(integration, "delta_criteria") integration.is_synced = is_synced integration.finished_at = finished_at diff --git a/integration_objects/manager.py b/integration_objects/manager.py index 92a010e2..afb52085 100644 --- a/integration_objects/manager.py +++ b/integration_objects/manager.py @@ -1,6 +1,7 @@ from typing import List, Optional, Dict, Union, Type from datetime import datetime from sqlalchemy import func +from sqlalchemy.orm.attributes import flag_modified from ..business_objects import general from ..cognition_objects import integration as integration_db_bo @@ -9,7 +10,9 @@ def get( - IntegrationModel: Type, integration_id: str, id: Optional[str] = None + IntegrationModel: Type, + integration_id: str, + id: Optional[str] = None, ) -> object: query = session.query(IntegrationModel).filter( IntegrationModel.integration_id == integration_id, @@ -20,12 +23,17 @@ def get( return query.order_by(IntegrationModel.created_at.desc()).all() -def get_by_id(IntegrationModel: Type, id: str) -> object: +def get_by_id( + IntegrationModel: Type, + id: str, +) -> object: return session.query(IntegrationModel).filter(IntegrationModel.id == id).first() def get_by_running_id( - IntegrationModel: Type, integration_id: str, running_id: int + IntegrationModel: Type, + integration_id: str, + running_id: int, ) -> object: return ( session.query(IntegrationModel) @@ -37,7 +45,11 @@ def get_by_running_id( ) -def get_by_source(IntegrationModel: Type, integration_id: str, source: str) -> object: +def get_by_source( + IntegrationModel: Type, + integration_id: str, + source: str, +) -> object: return ( session.query(IntegrationModel) .filter( @@ -49,7 +61,8 @@ def get_by_source(IntegrationModel: Type, integration_id: str, source: str) -> o def get_all_by_integration_id( - IntegrationModel: Type, integration_id: str + IntegrationModel: Type, + integration_id: str, ) -> List[object]: return ( session.query(IntegrationModel) @@ -59,7 +72,10 @@ def get_all_by_integration_id( ) -def get_all_by_project_id(IntegrationModel: Type, project_id: str) -> List[object]: +def get_all_by_project_id( + IntegrationModel: Type, + project_id: str, +) -> List[object]: integrations = integration_db_bo.get_all_by_project_id(project_id) return ( session.query(IntegrationModel) @@ -72,7 +88,9 @@ def get_all_by_project_id(IntegrationModel: Type, project_id: str) -> List[objec def get_existing_integration_records( - IntegrationModel, integration_id: str, by: Optional[str] = "source" + IntegrationModel: Type, + integration_id: str, + by: Optional[str] = "source", ) -> Dict[str, object]: return { getattr(record, by, record.source): record @@ -81,7 +99,9 @@ def get_existing_integration_records( def get_running_ids( - IntegrationModel: Type, integration_id: str, by: Optional[str] = "source" + IntegrationModel: Type, + integration_id: str, + by: Optional[str] = "source", ) -> int: return dict( session.query( @@ -145,6 +165,7 @@ def update( existing_value = getattr(integration_record, key, None) if value is not None and value != existing_value: setattr(integration_record, key, value) + flag_modified(integration_record, key) record_updated = True if record_updated: @@ -165,9 +186,14 @@ def delete_many( general.flush_or_commit(with_commit) -def clear_history(IntegrationModel: Type, id: str, with_commit: bool = False) -> None: +def clear_history( + IntegrationModel: Type, + id: str, + with_commit: bool = False, +) -> None: integration_record = get_by_id(IntegrationModel, id) integration_record.delta_criteria = None + flag_modified(integration_record, "delta_criteria") general.add(integration_record, with_commit) From 419811bc6c36ea171c109a5cb42c78441de1b3c1 Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Tue, 24 Jun 2025 13:02:11 +0200 Subject: [PATCH 077/114] new getters --- cognition_objects/group.py | 45 ++++++++++++++++++++++++++++--- cognition_objects/group_member.py | 44 +++++++++++++++++++++++------- 2 files changed, 76 insertions(+), 13 deletions(-) diff --git a/cognition_objects/group.py b/cognition_objects/group.py index c6f7a160..6d14a5c5 100644 --- a/cognition_objects/group.py +++ b/cognition_objects/group.py @@ -29,12 +29,35 @@ def get_all(organization_id: str) -> List[CognitionGroup]: ) +def get_all_by_integration_id( + organization_id: str, integration_id: str +) -> List[CognitionGroup]: + integration_id_json = CognitionGroup.meta_data.op("->>")("integration_id") + + return ( + session.query(CognitionGroup) + .filter( + CognitionGroup.organization_id == organization_id, + integration_id_json == integration_id, + ) + .order_by(CognitionGroup.name.asc()) + .all() + ) + + def get_all_by_integration_id_permission_grouped( organization_id: str, integration_id: str ) -> List[CognitionGroup]: integration_id_json = CognitionGroup.meta_data.op("->>")("integration_id") - integration_groups = session.query(CognitionGroup).filter(CognitionGroup.organization_id == organization_id, integration_id_json == integration_id).all() + integration_groups = ( + session.query(CognitionGroup) + .filter( + CognitionGroup.organization_id == organization_id, + integration_id_json == integration_id, + ) + .all() + ) integration_groups_by_permission = {} for group in integration_groups: permission_id = group.meta_data.get("permission_id") @@ -42,13 +65,24 @@ def get_all_by_integration_id_permission_grouped( return integration_groups_by_permission +def get_by_name(organization_id: str, name: str): + return ( + session.query(CognitionGroup) + .filter( + CognitionGroup.organization_id == organization_id, + CognitionGroup.name == name, + ) + .first() + ) + + def create_group( organization_id: str, name: str, description: str, created_by: str, created_at: Optional[datetime] = None, - with_commit: bool = False, + with_commit: bool = True, meta_data: Optional[dict] = None, ) -> CognitionGroup: group = CognitionGroup( @@ -67,7 +101,8 @@ def update_group( group_id: str, name: Optional[str] = None, description: Optional[str] = None, - with_commit: bool = False, + with_commit: bool = True, + meta_data: Optional[dict] = None, ) -> CognitionGroup: group = get(group_id) @@ -75,12 +110,14 @@ def update_group( group.name = name if description is not None: group.description = description + if meta_data is not None: + group.meta_data = meta_data general.flush_or_commit(with_commit) return group -def delete(organization_id: str, group_id: str, with_commit: bool = False) -> None: +def delete(organization_id: str, group_id: str, with_commit: bool = True) -> None: group = get_with_organization_id(organization_id, group_id) if group: general.delete(group, with_commit) diff --git a/cognition_objects/group_member.py b/cognition_objects/group_member.py index a5c25344..7122ec1e 100644 --- a/cognition_objects/group_member.py +++ b/cognition_objects/group_member.py @@ -9,7 +9,9 @@ def get(group_id: str, id: str): return ( session.query(CognitionGroupMember) - .filter(CognitionGroupMember.group_id == group_id, CognitionGroupMember.id == id) + .filter( + CognitionGroupMember.group_id == group_id, CognitionGroupMember.id == id + ) .first() ) @@ -17,28 +19,43 @@ def get(group_id: str, id: str): def get_by_group_and_user(group_id: str, user_id: str) -> CognitionGroupMember: return ( session.query(CognitionGroupMember) - .filter(CognitionGroupMember.group_id == group_id, CognitionGroupMember.user_id == user_id) + .filter( + CognitionGroupMember.group_id == group_id, + CognitionGroupMember.user_id == user_id, + ) .first() ) def get_by_user_id(user_id: str) -> list: - return session.query(CognitionGroupMember).filter(CognitionGroupMember.user_id == user_id).all() + return ( + session.query(CognitionGroupMember) + .filter(CognitionGroupMember.user_id == user_id) + .all() + ) def get_all_by_group(group_id: str) -> list: - return session.query(CognitionGroupMember).filter(CognitionGroupMember.group_id == group_id).all() + return ( + session.query(CognitionGroupMember) + .filter(CognitionGroupMember.group_id == group_id) + .all() + ) def get_all_by_group_count(group_id: str) -> int: - return session.query(CognitionGroupMember).filter(CognitionGroupMember.group_id == group_id).count() + return ( + session.query(CognitionGroupMember) + .filter(CognitionGroupMember.group_id == group_id) + .count() + ) def create( group_id: str, user_id: str, created_at: Optional[datetime] = None, - with_commit: bool = False, + with_commit: bool = True, ) -> CognitionGroupMember: already_exist = get_by_group_and_user(group_id=group_id, user_id=user_id) if already_exist: @@ -61,17 +78,26 @@ def create( def delete_by_group_and_user_id( - group_id: str, user_id: str, with_commit: bool = False + group_id: str, user_id: str, with_commit: bool = True ) -> None: group_member = get_by_group_and_user(group_id, user_id) if group_member: general.delete(group_member, with_commit) -def delete_by_user_id(user_id: str, with_commit: bool = False) -> None: +def delete_by_user_id(user_id: str, with_commit: bool = True) -> None: group_memberships = ( - session.query(CognitionGroupMember).filter(CognitionGroupMember.user_id == user_id).all() + session.query(CognitionGroupMember) + .filter(CognitionGroupMember.user_id == user_id) + .all() ) for membership in group_memberships: general.delete(membership, with_commit=False) general.flush_or_commit(with_commit) + + +def clear_by_group_id(group_id: str, with_commit: bool = True) -> None: + group_memberships = get_all_by_group(group_id) + for membership in group_memberships: + general.delete(membership, with_commit=False) + general.flush_or_commit(with_commit) From a20ea010f1711c56c3e46940f931dd7cb02a7c5c Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Tue, 24 Jun 2025 22:01:02 +0200 Subject: [PATCH 078/114] delete groups --- cognition_objects/integration.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index 92754722..1168fedf 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -5,7 +5,7 @@ from ..business_objects import general from ..session import session -from ..models import CognitionIntegration, Project +from ..models import CognitionIntegration, Project, CognitionGroup from ..enums import ( CognitionMarkdownFileState, CognitionIntegrationType, @@ -237,7 +237,10 @@ def execution_finished(id: str) -> bool: def delete_many( - ids: List[str], delete_refinery_projects: bool = False, with_commit: bool = True + ids: List[str], + delete_refinery_projects: bool = False, + delete_cognition_groups: bool = True, + with_commit: bool = True, ) -> None: integrations = session.query(CognitionIntegration).filter( CognitionIntegration.id.in_(ids) @@ -246,5 +249,9 @@ def delete_many( session.query(Project).filter( Project.id.in_(filter(None, [i.project_id for i in integrations])) ).delete(synchronize_session=False) + if delete_cognition_groups: + session.query(CognitionGroup).filter( + CognitionGroup.meta_data.op("->>")("integration_id").in_(ids) + ).delete(synchronize_session=False) integrations.delete(synchronize_session=False) general.flush_or_commit(with_commit) From 09b63b509fed6367a4534a3442298130063453cc Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 24 Jun 2025 22:17:43 +0200 Subject: [PATCH 079/114] fix: rm get project by name and org id --- business_objects/project.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/business_objects/project.py b/business_objects/project.py index 5ba723ac..d3d192b0 100644 --- a/business_objects/project.py +++ b/business_objects/project.py @@ -609,14 +609,3 @@ def get_project_by_project_id_sql(project_id: str) -> Dict[str, Any]: return value[0] else: return None - - -def get_by_name_and_org_id(name: str, organization_id: str) -> Optional[Project]: - return ( - session.query(Project) - .filter( - Project.name == name, - Project.organization_id == organization_id, - ) - .first() - ) From 082246762e718d3ac5f7af9b688c4ce0f51a6cf6 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Wed, 25 Jun 2025 09:03:51 +0200 Subject: [PATCH 080/114] perf: pr review comments --- cognition_objects/integration_access.py | 17 ++++++++-------- enums.py | 22 +++++---------------- integration_objects/manager.py | 10 +++++----- models.py | 26 +++++++++++-------------- 4 files changed, 29 insertions(+), 46 deletions(-) diff --git a/cognition_objects/integration_access.py b/cognition_objects/integration_access.py index cd5755ae..fadff1d5 100644 --- a/cognition_objects/integration_access.py +++ b/cognition_objects/integration_access.py @@ -24,17 +24,16 @@ def get_by_org_id(org_id: str) -> List[CognitionIntegrationAccess]: def get( - org_id: str, integration_type: CognitionIntegrationType + org_id: str, integration_type: Optional[CognitionIntegrationType] = None ) -> List[CognitionIntegrationAccess]: - return ( - session.query(CognitionIntegrationAccess) - .filter( - CognitionIntegrationAccess.organization_id == org_id, - CognitionIntegrationAccess.integration_type == integration_type, - ) - .order_by(CognitionIntegrationAccess.created_at.asc()) - .all() + query = session.query(CognitionIntegrationAccess).filter( + CognitionIntegrationAccess.organization_id == org_id, ) + if integration_type: + query = query.filter( + CognitionIntegrationAccess.integration_type == integration_type.value + ) + return query.order_by(CognitionIntegrationAccess.created_at.asc()).all() def get_all() -> List[CognitionIntegrationAccess]: diff --git a/enums.py b/enums.py index 5754e25d..cee39bae 100644 --- a/enums.py +++ b/enums.py @@ -887,11 +887,6 @@ class EvaluationRunState(Enum): class CognitionIntegrationType(Enum): - # CSV = "CSV" - # JSON = "JSON" - # DOCX = "DOCX" - # XLSX = "XLSX" - # WEBPAGE = "WEBPAGE" SHAREPOINT = "SHAREPOINT" GITHUB_FILE = "GITHUB_FILE" GITHUB_ISSUE = "GITHUB_ISSUE" @@ -912,20 +907,13 @@ class IntegrationMetadata(Enum): """ Enum for controlling and documenting the dynamic metadata fields associated with different integration types. - The `IntegrationMetadata` enum defines which metadata keys are expected and allowed for each integration type - (e.g., GITHUB_FILE, GITHUB_ISSUE, PDF). Each member contains a set of keys specific to that integration, while - the `__DEFAULT__` member defines a set of common metadata fields (`source`, `delta_criteria`, `minio_file_name`) - that are always included. + The `IntegrationMetadata` enum defines which metadata keys are expected and allowed for each integration table + (e.g., `integration.SHAREPOINT`, `integration.GITHUB_FILE`). Each member contains a set of keys specific to that integration, while + the `__DEFAULT__` (`source`, `delta_criteria`, `minio_file_name`) are always included. - During extraction (see the `extract` functions in the integration handlers), metadata is dynamically attached to - each document according to the rules defined here. This ensures that only the relevant and allowed metadata fields - are published to the database for each integration type. + During extraction, metadata is dynamically attached to each document according to the rules defined here. - The enum provides utility methods: - - `from_string(value: str)`: Returns the union of default and integration-specific metadata keys for a given type. - - `from_table_name(table_name: str)`: Looks up metadata keys based on the integration's table name. - - This enum is used by the integration object logic (see `submodules/model/integration_objects/__init__.py`) to + This enum is used by the integration object logic (see `src.util.integration #delta_load`) to validate and filter metadata before persisting it, ensuring consistency and preventing unwanted fields from being stored in the database. diff --git a/integration_objects/manager.py b/integration_objects/manager.py index afb52085..ad55002b 100644 --- a/integration_objects/manager.py +++ b/integration_objects/manager.py @@ -90,7 +90,7 @@ def get_all_by_project_id( def get_existing_integration_records( IntegrationModel: Type, integration_id: str, - by: Optional[str] = "source", + by: str = "source", ) -> Dict[str, object]: return { getattr(record, by, record.source): record @@ -101,8 +101,8 @@ def get_existing_integration_records( def get_running_ids( IntegrationModel: Type, integration_id: str, - by: Optional[str] = "source", -) -> int: + by: str = "source", +) -> Dict[str, int]: return dict( session.query( getattr(IntegrationModel, by, IntegrationModel.source), @@ -169,7 +169,7 @@ def update( record_updated = True if record_updated: - general.add(integration_record, with_commit=with_commit) + general.flush_or_commit(with_commit) return integration_record @@ -194,7 +194,7 @@ def clear_history( integration_record = get_by_id(IntegrationModel, id) integration_record.delta_criteria = None flag_modified(integration_record, "delta_criteria") - general.add(integration_record, with_commit) + general.flush_or_commit(with_commit) def _get_supported_metadata( diff --git a/models.py b/models.py index 77a5c011..169b9db0 100644 --- a/models.py +++ b/models.py @@ -2097,13 +2097,13 @@ class CognitionIntegration(Base): created_by = Column( UUID(as_uuid=True), ForeignKey(f"{Tablenames.USER.value}.id", ondelete="SET NULL"), - index=False, + index=True, ) created_at = Column(DateTime, default=sql.func.now()) updated_by = Column( UUID(as_uuid=True), ForeignKey(f"{Tablenames.USER.value}.id", ondelete="SET NULL"), - index=False, + index=True, ) updated_at = Column(DateTime, onupdate=sql.func.now()) started_at = Column(DateTime) @@ -2136,7 +2136,7 @@ class CognitionIntegrationAccess(Base): created_by = Column( UUID(as_uuid=True), ForeignKey(f"{Tablenames.USER.value}.id", ondelete="SET NULL"), - index=False, + index=True, ) created_at = Column(DateTime, default=sql.func.now()) organization_id = Column( @@ -2164,13 +2164,12 @@ class IntegrationGithubFile(Base): created_by = Column( UUID(as_uuid=True), ForeignKey(f"{Tablenames.USER.value}.id", ondelete="SET NULL"), - index=False, + index=True, ) updated_by = Column( UUID(as_uuid=True), ForeignKey(f"{Tablenames.USER.value}.id", ondelete="SET NULL"), - index=False, - nullable=True, + index=True, ) created_at = Column(DateTime, default=sql.func.now()) updated_at = Column(DateTime, default=None, onupdate=sql.func.now()) @@ -2205,13 +2204,12 @@ class IntegrationGithubIssue(Base): created_by = Column( UUID(as_uuid=True), ForeignKey(f"{Tablenames.USER.value}.id", ondelete="SET NULL"), - index=False, + index=True, ) updated_by = Column( UUID(as_uuid=True), ForeignKey(f"{Tablenames.USER.value}.id", ondelete="SET NULL"), - index=False, - nullable=True, + index=True, ) created_at = Column(DateTime, default=sql.func.now()) updated_at = Column(DateTime, default=None, onupdate=sql.func.now()) @@ -2246,13 +2244,12 @@ class IntegrationPdf(Base): created_by = Column( UUID(as_uuid=True), ForeignKey(f"{Tablenames.USER.value}.id", ondelete="SET NULL"), - index=False, + index=True, ) updated_by = Column( UUID(as_uuid=True), ForeignKey(f"{Tablenames.USER.value}.id", ondelete="SET NULL"), - index=False, - nullable=True, + index=True, ) created_at = Column(DateTime, default=sql.func.now()) updated_at = Column(DateTime, default=None, onupdate=sql.func.now()) @@ -2286,13 +2283,12 @@ class IntegrationSharepoint(Base): created_by = Column( UUID(as_uuid=True), ForeignKey(f"{Tablenames.USER.value}.id", ondelete="SET NULL"), - index=False, + index=True, ) updated_by = Column( UUID(as_uuid=True), ForeignKey(f"{Tablenames.USER.value}.id", ondelete="SET NULL"), - index=False, - nullable=True, + index=True, ) created_at = Column(DateTime, default=sql.func.now()) updated_at = Column(DateTime, default=None, onupdate=sql.func.now()) From 39b710f5c1ed2c2593dab095bc6b790a845a673b Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Wed, 25 Jun 2025 09:04:38 +0200 Subject: [PATCH 081/114] perf: pr review comments --- cognition_objects/integration.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index bacc63c3..92f081a6 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -12,6 +12,11 @@ CognitionIntegrationType, ) +FINISHED_STATES = [ + CognitionMarkdownFileState.FINISHED.value, + CognitionMarkdownFileState.FAILED.value, +] + def get_by_ids(ids: List[str]) -> List[CognitionIntegration]: return ( @@ -31,8 +36,8 @@ def get_by_id(id: str) -> CognitionIntegration: def get_all( integration_type: Optional[str] = None, - exclude_failed: Optional[bool] = False, - only_synced: Optional[bool] = False, + exclude_failed: bool = False, + only_synced: bool = False, ) -> List[CognitionIntegration]: query = session.query(CognitionIntegration) if integration_type: @@ -49,7 +54,7 @@ def get_all( def get_all_in_org( org_id: str, integration_type: Optional[str] = None, - only_synced: Optional[bool] = False, + only_synced: bool = False, ) -> List[CognitionIntegration]: query = session.query(CognitionIntegration).filter( CognitionIntegration.organization_id == org_id @@ -67,9 +72,6 @@ def get_all_in_org_paginated( page: int = 1, page_size: int = 10, ) -> List[CognitionIntegration]: - schema_name = CognitionIntegration.__table__.schema or "public" - table_name = f"{schema_name}.{CognitionIntegration.__tablename__}" - first_page = (page - 1) * page_size last_page = page * page_size @@ -78,7 +80,7 @@ def get_all_in_org_paginated( SELECT ROW_NUMBER () OVER(PARTITION BY intg.id ORDER BY intg.created_at ASC) rn, intg.id - FROM {table_name} intg + FROM cognition.{CognitionIntegration.__tablename__} intg WHERE intg.organization_id = '{org_id}' ) pages WHERE rn BETWEEN {first_page} AND {last_page} @@ -229,12 +231,7 @@ def execution_finished(id: str) -> bool: session.query(CognitionIntegration) .filter( CognitionIntegration.id == id, - CognitionIntegration.state.in_( - [ - CognitionMarkdownFileState.FINISHED.value, - CognitionMarkdownFileState.FAILED.value, - ] - ), + CognitionIntegration.state.in_(FINISHED_STATES), ) .first() ) From 5ffe2bf5ade5ae8922d21f7f4ae0e61205eb98f0 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Wed, 25 Jun 2025 09:54:34 +0200 Subject: [PATCH 082/114] fix: paginated query --- cognition_objects/integration.py | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index 92f081a6..101f042e 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -72,29 +72,19 @@ def get_all_in_org_paginated( page: int = 1, page_size: int = 10, ) -> List[CognitionIntegration]: - first_page = (page - 1) * page_size - last_page = page * page_size - - sql = f""" - SELECT id FROM ( - SELECT - ROW_NUMBER () OVER(PARTITION BY intg.id ORDER BY intg.created_at ASC) rn, - intg.id - FROM cognition.{CognitionIntegration.__tablename__} intg - WHERE intg.organization_id = '{org_id}' - ) pages - WHERE rn BETWEEN {first_page} AND {last_page} - """ - integration_ids = general.execute_all(sql) - if not integration_ids: - return [] - query = session.query(CognitionIntegration).filter( - CognitionIntegration.id.in_([row[0] for row in integration_ids]) + CognitionIntegration.organization_id == org_id, ) + if integration_type: query = query.filter(CognitionIntegration.type == integration_type) - return query.order_by(CognitionIntegration.created_at.desc()).all() + + return ( + query.order_by(CognitionIntegration.created_at.desc()) + .limit(page_size) + .offset((page - 1) * page_size) + .all() + ) def get_all_by_project_id(project_id: str) -> List[CognitionIntegration]: From be012217a40c994d63f469c5af50d6fb98e21a55 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Wed, 25 Jun 2025 10:35:29 +0200 Subject: [PATCH 083/114] perf: update integrations model --- models.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/models.py b/models.py index 169b9db0..a661453b 100644 --- a/models.py +++ b/models.py @@ -2172,7 +2172,7 @@ class IntegrationGithubFile(Base): index=True, ) created_at = Column(DateTime, default=sql.func.now()) - updated_at = Column(DateTime, default=None, onupdate=sql.func.now()) + updated_at = Column(DateTime, onupdate=sql.func.now()) integration_id = Column( UUID(as_uuid=True), ForeignKey(f"cognition.{Tablenames.INTEGRATION.value}.id", ondelete="CASCADE"), @@ -2212,7 +2212,7 @@ class IntegrationGithubIssue(Base): index=True, ) created_at = Column(DateTime, default=sql.func.now()) - updated_at = Column(DateTime, default=None, onupdate=sql.func.now()) + updated_at = Column(DateTime, onupdate=sql.func.now()) integration_id = Column( UUID(as_uuid=True), ForeignKey(f"cognition.{Tablenames.INTEGRATION.value}.id", ondelete="CASCADE"), @@ -2252,7 +2252,7 @@ class IntegrationPdf(Base): index=True, ) created_at = Column(DateTime, default=sql.func.now()) - updated_at = Column(DateTime, default=None, onupdate=sql.func.now()) + updated_at = Column(DateTime, onupdate=sql.func.now()) integration_id = Column( UUID(as_uuid=True), ForeignKey(f"cognition.{Tablenames.INTEGRATION.value}.id", ondelete="CASCADE"), @@ -2291,7 +2291,7 @@ class IntegrationSharepoint(Base): index=True, ) created_at = Column(DateTime, default=sql.func.now()) - updated_at = Column(DateTime, default=None, onupdate=sql.func.now()) + updated_at = Column(DateTime, onupdate=sql.func.now()) integration_id = Column( UUID(as_uuid=True), ForeignKey(f"cognition.{Tablenames.INTEGRATION.value}.id", ondelete="CASCADE"), From c389dc6a99eec89b3a903c6f60f3631251a0039b Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Wed, 25 Jun 2025 15:34:14 +0200 Subject: [PATCH 084/114] perf: update to_snake_case regex compilation --- util.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/util.py b/util.py index 71bc856f..b3035bc8 100644 --- a/util.py +++ b/util.py @@ -15,6 +15,10 @@ from .business_objects import general CAMEL_CASE_PATTERN = compile(r"^([a-z]+[A-Z]?)*$") +SNAKE_CASE_PATTERNS = [ + compile(r"(.)([A-Z][a-z]+)"), + compile(r"([a-z0-9])([A-Z])"), +] def collect_engine_variables() -> Tuple[int, int, bool, bool]: @@ -209,10 +213,12 @@ def to_camel_case(name: str): def to_snake_case(name: str): + # ref: https://stackoverflow.com/questions/1175208/elegant-python-function-to-convert-camelcase-to-snake-case if not is_camel_case(name): return name - name = sub("(.)([A-Z][a-z]+)", r"\1_\2", name) - return sub("([a-z0-9])([A-Z])", r"\1_\2", name).lower() + for phase in SNAKE_CASE_PATTERNS: + name = phase.sub(r"\1_\2", name) + return name.lower() def is_list_like(value: Any) -> bool: From 0be3fc5ebb85de649a9e5528bbca041217309d38 Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Wed, 25 Jun 2025 16:15:38 +0200 Subject: [PATCH 085/114] access management --- business_objects/project.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/business_objects/project.py b/business_objects/project.py index bccf1030..96f42941 100644 --- a/business_objects/project.py +++ b/business_objects/project.py @@ -8,11 +8,7 @@ from .. import enums from ..session import session -from ..models import ( - Project, - Record, - Attribute -) +from ..models import Project, Record, Attribute from ..util import prevent_sql_injection QUEUE_PROJECT_NAME = "@@HIDDEN_QUEUE_PROJECT@@" @@ -162,7 +158,11 @@ def get_all_with_access_management(organization_id: str) -> List[Project]: .join(Attribute, Project.id == Attribute.project_id) .filter( Project.organization_id == organization_id, + Attribute.name.in_(["__ACCESS_GROUPS", "__ACCESS_USERS"]), # Attribute.name.in_(["__ACCESS_GROUPS", "__ACCESS_USERS"]), + Attribute.user_created == False, + Attribute.data_type == enums.DataTypes.PERMISSION.value, + Attribute.state == enums.AttributeState.AUTOMATICALLY_CREATED.value, ) .distinct() .all() @@ -176,6 +176,9 @@ def check_access_management_active(project_id: str) -> bool: .filter( Project.id == project_id, Attribute.name.in_(["__ACCESS_GROUPS", "__ACCESS_USERS"]), + Attribute.user_created == False, + Attribute.data_type == enums.DataTypes.PERMISSION.value, + Attribute.state == enums.AttributeState.AUTOMATICALLY_CREATED.value, ) .count() > 0 From f1b40ebbe7000ff8cff502c956cc207d6e119b57 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Wed, 25 Jun 2025 16:32:52 +0200 Subject: [PATCH 086/114] perf: db model update --- cognition_objects/integration.py | 15 +++++---------- models.py | 3 +-- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index 101f042e..3a40dafa 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -227,15 +227,10 @@ def execution_finished(id: str) -> bool: ) -def delete_many( - ids: List[str], delete_refinery_projects: bool = False, with_commit: bool = True -) -> None: - integrations = session.query(CognitionIntegration).filter( - CognitionIntegration.id.in_(ids) +def delete_many(ids: List[str], with_commit: bool = True) -> None: + ( + session.query(CognitionIntegration) + .filter(CognitionIntegration.id.in_(ids)) + .delete(synchronize_session=False) ) - if delete_refinery_projects: - session.query(Project).filter( - Project.id.in_(filter(None, [i.project_id for i in integrations])) - ).delete(synchronize_session=False) - integrations.delete(synchronize_session=False) general.flush_or_commit(with_commit) diff --git a/models.py b/models.py index a661453b..3a5cad97 100644 --- a/models.py +++ b/models.py @@ -2090,9 +2090,8 @@ class CognitionIntegration(Base): ) project_id = Column( UUID(as_uuid=True), - ForeignKey(f"{Tablenames.PROJECT.value}.id", ondelete="CASCADE"), + ForeignKey(f"{Tablenames.PROJECT.value}.id", ondelete="SET NULL"), index=True, - nullable=True, ) created_by = Column( UUID(as_uuid=True), From 744ee3691b7d62b639530c8ae7fce41cdeda63f0 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Wed, 25 Jun 2025 17:04:53 +0200 Subject: [PATCH 087/114] perf: pr comments --- cognition_objects/integration.py | 15 +++++++++++---- cognition_objects/integration_access.py | 12 ++++++++---- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index 3a40dafa..46da975e 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -6,7 +6,7 @@ from ..business_objects import general from ..session import session -from ..models import CognitionIntegration, Project +from ..models import CognitionIntegration from ..enums import ( CognitionMarkdownFileState, CognitionIntegrationType, @@ -208,9 +208,16 @@ def update( if delta_criteria is not None: integration.delta_criteria = delta_criteria flag_modified(integration, "delta_criteria") - - integration.is_synced = is_synced - integration.finished_at = finished_at + if is_synced is not None: + if is_synced == "NULL": + integration.is_synced = None + else: + integration.is_synced = is_synced + if finished_at is not None: + if finished_at == "NULL": + integration.finished_at = None + else: + integration.finished_at = finished_at general.add(integration, with_commit) return integration diff --git a/cognition_objects/integration_access.py b/cognition_objects/integration_access.py index fadff1d5..548e3e71 100644 --- a/cognition_objects/integration_access.py +++ b/cognition_objects/integration_access.py @@ -47,7 +47,7 @@ def get_all() -> List[CognitionIntegrationAccess]: def create( org_id: str, user_id: str, - integration_types: List[str], + integration_types: List[CognitionIntegrationType], with_commit: bool = True, created_at: Optional[datetime] = None, ) -> CognitionIntegrationAccess: @@ -55,7 +55,9 @@ def create( organization_id=org_id, created_by=user_id, created_at=created_at, - integration_types=integration_types, + integration_types=[ + integration_type.value for integration_type in integration_types + ], ) general.add(integration_access, with_commit) @@ -65,14 +67,16 @@ def create( def update( id: str, org_id: Optional[str] = None, - integration_types: Optional[List[str]] = None, + integration_types: Optional[List[CognitionIntegrationType]] = None, with_commit: bool = True, ) -> CognitionIntegrationAccess: integration_access = get_by_id(id) if org_id: integration_access.organization_id = org_id if integration_types: - integration_access.integration_types = integration_types + integration_access.integration_types = [ + integration_type.value for integration_type in integration_types + ] general.add(integration_access, with_commit) return integration_access From c1ae06149ca52ca08e7992c2467ecdc038be6252 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Wed, 25 Jun 2025 17:06:08 +0200 Subject: [PATCH 088/114] perf: remove unnecessary checks --- cognition_objects/integration.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index 46da975e..b869277b 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -1,6 +1,5 @@ from typing import List, Optional, Dict import datetime -from fastapi import HTTPException from sqlalchemy import func from sqlalchemy.orm.attributes import flag_modified @@ -139,8 +138,6 @@ def create( project_id: Optional[str] = None, with_commit: bool = True, ) -> CognitionIntegration: - if state not in CognitionMarkdownFileState.all(): - raise HTTPException(status_code=400, detail=f"Invalid state: {state}") integration: CognitionIntegration = CognitionIntegration( id=id, organization_id=org_id, From 88caad0524284f289fb8e41e39adaabb98f3f3a7 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Wed, 25 Jun 2025 17:07:34 +0200 Subject: [PATCH 089/114] fix: nullable error message --- cognition_objects/integration.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index b869277b..6d5abd2d 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -196,8 +196,6 @@ def update( if llm_config is not None: integration.llm_config = llm_config flag_modified(integration, "llm_config") - if error_message is not None: - integration.error_message = error_message if started_at is not None: integration.started_at = started_at if last_synced_at is not None: @@ -205,6 +203,11 @@ def update( if delta_criteria is not None: integration.delta_criteria = delta_criteria flag_modified(integration, "delta_criteria") + if error_message is not None: + if error_message == "NULL": + integration.error_message = None + else: + integration.error_message = error_message if is_synced is not None: if is_synced == "NULL": integration.is_synced = None From 4fa69bc3e8911a61bcd82a5a11177e4d40a9ccdc Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Wed, 25 Jun 2025 17:48:20 +0200 Subject: [PATCH 090/114] perf: pr comments --- cognition_objects/integration.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index 6d5abd2d..969900e3 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -1,4 +1,4 @@ -from typing import List, Optional, Dict +from typing import List, Optional, Dict, Union import datetime from sqlalchemy import func from sqlalchemy.orm.attributes import flag_modified @@ -81,7 +81,7 @@ def get_all_in_org_paginated( return ( query.order_by(CognitionIntegration.created_at.desc()) .limit(page_size) - .offset((page - 1) * page_size) + .offset(max(0, (page - 1) * page_size)) .all() ) @@ -172,9 +172,9 @@ def update( llm_config: Optional[Dict] = None, error_message: Optional[str] = None, started_at: Optional[datetime.datetime] = None, - finished_at: Optional[datetime.datetime] = None, + finished_at: Optional[Union[str, datetime.datetime]] = None, last_synced_at: Optional[datetime.datetime] = None, - is_synced: Optional[bool] = None, + is_synced: Optional[Union[str, bool]] = None, delta_criteria: Optional[Dict[str, str]] = None, with_commit: bool = True, ) -> CognitionIntegration: From 74008a9201b402cc877963f477b2b1b8b2d88c7b Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Thu, 26 Jun 2025 11:00:47 +0200 Subject: [PATCH 091/114] delete --- cognition_objects/integration.py | 28 +++++++--------------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index 36b3b6f2..15202741 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -5,7 +5,7 @@ from ..business_objects import general from ..session import session -from ..models import CognitionIntegration, Project, CognitionGroup +from ..models import CognitionIntegration, CognitionGroup from ..enums import ( CognitionMarkdownFileState, CognitionIntegrationType, @@ -234,30 +234,16 @@ def execution_finished(id: str) -> bool: ) -def delete_many2( - ids: List[str], - delete_refinery_projects: bool = False, - delete_cognition_groups: bool = True, - with_commit: bool = True, +def delete_many( + ids: List[str], delete_cognition_groups: bool = True, with_commit: bool = True ) -> None: - integrations = session.query(CognitionIntegration).filter( - CognitionIntegration.id.in_(ids) - ) - if delete_refinery_projects: - session.query(Project).filter( - Project.id.in_(filter(None, [i.project_id for i in integrations])) - ).delete(synchronize_session=False) - if delete_cognition_groups: - session.query(CognitionGroup).filter( - CognitionGroup.meta_data.op("->>")("integration_id").in_(ids) - ).delete(synchronize_session=False) - integrations.delete(synchronize_session=False) - - -def delete_many(ids: List[str], with_commit: bool = True) -> None: ( session.query(CognitionIntegration) .filter(CognitionIntegration.id.in_(ids)) .delete(synchronize_session=False) ) + if delete_cognition_groups: + session.query(CognitionGroup).filter( + CognitionGroup.meta_data.op("->>")("integration_id").in_(ids) + ).delete(synchronize_session=False) general.flush_or_commit(with_commit) From 3020f36e5f4056d54ac4ee602d78f8457af582ed Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 26 Jun 2025 14:00:08 +0200 Subject: [PATCH 092/114] perf: move IntegrationMetadata enum to integration_objects.helper --- enums.py | 56 ---------------------------------- integration_objects/helper.py | 52 +++++++++++++++++++++++++++++++ integration_objects/manager.py | 11 +++++-- models.py | 9 ++++-- 4 files changed, 66 insertions(+), 62 deletions(-) create mode 100644 integration_objects/helper.py diff --git a/enums.py b/enums.py index 144cda42..49469f93 100644 --- a/enums.py +++ b/enums.py @@ -915,59 +915,3 @@ def from_string(value: str): raise KeyError( f"Could not parse CognitionIntegrationType from string '{changed_value}'" ) - - -class IntegrationMetadata(Enum): - """ - Enum for controlling and documenting the dynamic metadata fields associated with different integration types. - - The `IntegrationMetadata` enum defines which metadata keys are expected and allowed for each integration table - (e.g., `integration.SHAREPOINT`, `integration.GITHUB_FILE`). Each member contains a set of keys specific to that integration, while - the `__DEFAULT__` (`source`, `delta_criteria`, `minio_file_name`) are always included. - - During extraction, metadata is dynamically attached to each document according to the rules defined here. - - This enum is used by the integration object logic (see `src.util.integration #delta_load`) to - validate and filter metadata before persisting it, ensuring consistency and preventing unwanted fields from being - stored in the database. - - Example: - IntegrationMetadata.from_string("PDF") - # returns: {"source", "minio_file_name", "file_path", "page", "total_pages", "title"} - """ - - __DEFAULT__ = {"source", "minio_file_name", "running_id", "delta_criteria"} - - GITHUB_FILE = {"path", "sha", "code_language"} - GITHUB_ISSUE = {"url", "state", "number", "assignee", "milestone"} - PDF = {"file_path", "page", "total_pages", "title"} - SHAREPOINT = { - "extension", - "object_id", - "parent_path", - "name", - "web_url", - f"{Tablenames.INTEGRATION_SHAREPOINT.value}_created_by", - "modified_by", - "created", - "modified", - "description", - "size", - "mime_type", - "hashes", - "permissions", - } - - @staticmethod - def from_string(value: str): - try: - metadata_keys = IntegrationMetadata[value].value - except KeyError: - raise ValueError( - f"Could not parse IntegrationMetadata from string '{value}'" - ) - return IntegrationMetadata.__DEFAULT__.union(metadata_keys) - - @staticmethod - def from_table_name(table_name: str): - return IntegrationMetadata.from_string(table_name.upper()) diff --git a/integration_objects/helper.py b/integration_objects/helper.py new file mode 100644 index 00000000..07c05772 --- /dev/null +++ b/integration_objects/helper.py @@ -0,0 +1,52 @@ +from ..enums import Tablenames + + +DEFAULT_METADATA = {"source", "minio_file_name", "running_id"} +TABLE_METADATA = { + Tablenames.INTEGRATION_GITHUB_FILE: {"path", "sha", "code_language"}, + Tablenames.INTEGRATION_GITHUB_ISSUE: { + "url", + "state", + "number", + "assignee", + "milestone", + }, + Tablenames.INTEGRATION_PDF: {"file_path", "page", "total_pages", "title"}, + Tablenames.INTEGRATION_SHAREPOINT: { + "extension", + "object_id", + "parent_path", + "name", + "web_url", + f"{Tablenames.INTEGRATION_SHAREPOINT.value}_created_by", + "modified_by", + "created", + "modified", + "description", + "size", + "mime_type", + "hashes", + "permissions", + }, +} + + +def get_supported_metadata_keys(table_name: str): + """ + Function for controlling and documenting the dynamic metadata fields associated with different integration types. + + The `TABLE_METADATA` dictionary defines which metadata keys are expected and allowed for each integration table + (e.g., `integration.sharepoint`, `integration.github_file`). Each value contains a set of keys specific to that integration, while + the `DEFAULT_METADATA` (`source`, `running_id`, `minio_file_name`) are always included. + + During extraction, metadata is dynamically attached to each document according to the rules defined here. + + This function is used by the integration object logic (see `src.util.integration #delta_load`) to + validate and filter metadata before persisting it, ensuring consistency and preventing unwanted fields from being + stored in the database. + + Example: + get_supported_metadata("pdf") + # returns: {"source", "minio_file_name", "running_id", "file_path", "page", "total_pages", "title"} + """ + return DEFAULT_METADATA.union(TABLE_METADATA.get(table_name, set())) diff --git a/integration_objects/manager.py b/integration_objects/manager.py index ad55002b..ca9683a6 100644 --- a/integration_objects/manager.py +++ b/integration_objects/manager.py @@ -6,7 +6,7 @@ from ..business_objects import general from ..cognition_objects import integration as integration_db_bo from ..session import session -from ..enums import IntegrationMetadata +from .helper import get_supported_metadata_keys def get( @@ -120,6 +120,7 @@ def create( integration_id: str, running_id: int, created_at: Optional[datetime] = None, + error_message: Optional[str] = None, id: Optional[str] = None, with_commit: bool = True, **metadata, @@ -129,6 +130,7 @@ def create( integration_id=integration_id, running_id=running_id, created_at=created_at, + error_message=error_message, id=id, **metadata, ) @@ -145,6 +147,7 @@ def update( updated_by: str, running_id: Optional[int] = None, updated_at: Optional[datetime] = None, + error_message: Optional[str] = None, with_commit: bool = True, **metadata, ) -> object: @@ -155,6 +158,8 @@ def update( integration_record.running_id = running_id if updated_at is not None: integration_record.updated_at = updated_at + if error_message is not None: + integration_record.error_message = error_message record_updated = False for key, value in metadata.items(): @@ -197,10 +202,10 @@ def clear_history( general.flush_or_commit(with_commit) -def _get_supported_metadata( +def get_supported_metadata( table_name: str, metadata: Dict[str, Union[str, int, float, bool]] ) -> None: - supported_keys = IntegrationMetadata.from_table_name(table_name) + supported_keys = get_supported_metadata_keys(table_name) supported_metadata = { key: metadata[key] for key in supported_keys.intersection(metadata.keys()) } diff --git a/models.py b/models.py index 3a5cad97..3a747b0d 100644 --- a/models.py +++ b/models.py @@ -2179,13 +2179,13 @@ class IntegrationGithubFile(Base): ) running_id = Column(Integer, index=True) source = Column(String, index=True) + minio_file_name = Column(String) + error_message = Column(String) + path = Column(String) sha = Column(String) code_language = Column(String) - delta_criteria = Column(JSON) - minio_file_name = Column(String) - class IntegrationGithubIssue(Base): __tablename__ = Tablenames.INTEGRATION_GITHUB_ISSUE.value @@ -2220,6 +2220,7 @@ class IntegrationGithubIssue(Base): running_id = Column(Integer, index=True) source = Column(String, index=True) minio_file_name = Column(String) + error_message = Column(String) url = Column(String) state = Column(String) @@ -2260,6 +2261,7 @@ class IntegrationPdf(Base): running_id = Column(Integer, index=True) source = Column(String, index=True) minio_file_name = Column(String) + error_message = Column(String) file_path = Column(String) page = Column(Integer) @@ -2299,6 +2301,7 @@ class IntegrationSharepoint(Base): running_id = Column(Integer, index=True) source = Column(String, index=True) minio_file_name = Column(String) + error_message = Column(String) extension = Column(String) object_id = Column(String) From 2fe5e2d9e642d71607119ff9719d54d20d573e27 Mon Sep 17 00:00:00 2001 From: lumburovskalina <104008550+lumburovskalina@users.noreply.github.com> Date: Thu, 26 Jun 2025 14:47:28 +0200 Subject: [PATCH 093/114] Oidc field in the users table (#170) Co-authored-by: andhreljaKern From 9a7026f95dbfa46b32f26e7d33a49115c5c8fe7f Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 26 Jun 2025 15:01:19 +0200 Subject: [PATCH 094/114] fix: metadata helper function --- integration_objects/helper.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/integration_objects/helper.py b/integration_objects/helper.py index 07c05772..e513125f 100644 --- a/integration_objects/helper.py +++ b/integration_objects/helper.py @@ -3,16 +3,16 @@ DEFAULT_METADATA = {"source", "minio_file_name", "running_id"} TABLE_METADATA = { - Tablenames.INTEGRATION_GITHUB_FILE: {"path", "sha", "code_language"}, - Tablenames.INTEGRATION_GITHUB_ISSUE: { + Tablenames.INTEGRATION_PDF.value: {"file_path", "page", "total_pages", "title"}, + Tablenames.INTEGRATION_GITHUB_FILE.value: {"path", "sha", "code_language"}, + Tablenames.INTEGRATION_GITHUB_ISSUE.value: { "url", "state", "number", "assignee", "milestone", }, - Tablenames.INTEGRATION_PDF: {"file_path", "page", "total_pages", "title"}, - Tablenames.INTEGRATION_SHAREPOINT: { + Tablenames.INTEGRATION_SHAREPOINT.value: { "extension", "object_id", "parent_path", From 2a4899f3e89f5f7eef9c056e9a1b74d5f313382e Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 26 Jun 2025 20:03:10 +0200 Subject: [PATCH 095/114] perf: update integration task type name --- enums.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/enums.py b/enums.py index 3a102b7d..8dc02764 100644 --- a/enums.py +++ b/enums.py @@ -505,7 +505,7 @@ class TaskType(Enum): TASK_QUEUE_ACTION = "task_queue_action" RUN_COGNITION_MACRO = "RUN_COGNITION_MACRO" PARSE_COGNITION_FILE = "PARSE_COGNITION_FILE" - INTEGRATION = "INTEGRATION" + EXECUTE_INTEGRATION = "EXECUTE_INTEGRATION" class TaskQueueAction(Enum): From d57104ed703ead2ba4ca493a2ce68430ef26e34e Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 26 Jun 2025 21:19:17 +0200 Subject: [PATCH 096/114] style: formatting --- cognition_objects/integration.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index 15202741..4c34b1b1 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -243,7 +243,9 @@ def delete_many( .delete(synchronize_session=False) ) if delete_cognition_groups: - session.query(CognitionGroup).filter( - CognitionGroup.meta_data.op("->>")("integration_id").in_(ids) - ).delete(synchronize_session=False) + ( + session.query(CognitionGroup) + .filter(CognitionGroup.meta_data.op("->>")("integration_id").in_(ids)) + .delete(synchronize_session=False) + ) general.flush_or_commit(with_commit) From db484cfcbfc9bd633626935c2e4bffdae1bea13e Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 27 Jun 2025 12:30:59 +0200 Subject: [PATCH 097/114] perf: pr review comments --- business_objects/monitor.py | 18 +++++++++++++----- integration_objects/helper.py | 4 +++- integration_objects/manager.py | 6 +++--- util.py | 4 ++-- 4 files changed, 21 insertions(+), 11 deletions(-) diff --git a/business_objects/monitor.py b/business_objects/monitor.py index 5faaf353..e17d27c8 100644 --- a/business_objects/monitor.py +++ b/business_objects/monitor.py @@ -1,4 +1,5 @@ from typing import Any, List, Optional +import datetime from . import general from .. import enums from ..models import TaskQueue, Organization @@ -200,12 +201,19 @@ def set_parse_cognition_file_task_to_failed( def set_integration_task_to_failed( integration_id: str, - with_commit: bool = False, + is_synced: bool = False, + error_message: Optional[str] = None, + with_commit: bool = True, ) -> None: - integration = integration_db_bo.get_by_id(integration_id) - if integration: - integration.state = enums.CognitionMarkdownFileState.FAILED.value - general.flush_or_commit(with_commit) + integration_db_bo.update( + id=integration_id, + state=enums.CognitionMarkdownFileState.FAILED, + finished_at=datetime.datetime.now(datetime.timezone.utc), + is_synced=is_synced, + error_message=error_message, + last_synced_at=datetime.datetime.now(datetime.timezone.utc), + with_commit=with_commit, + ) def __select_running_information_source_payloads( diff --git a/integration_objects/helper.py b/integration_objects/helper.py index e513125f..d03a3a3a 100644 --- a/integration_objects/helper.py +++ b/integration_objects/helper.py @@ -1,3 +1,5 @@ +from typing import Set + from ..enums import Tablenames @@ -31,7 +33,7 @@ } -def get_supported_metadata_keys(table_name: str): +def get_supported_metadata_keys(table_name: str) -> Set[str]: """ Function for controlling and documenting the dynamic metadata fields associated with different integration types. diff --git a/integration_objects/manager.py b/integration_objects/manager.py index ca9683a6..a9e81aa1 100644 --- a/integration_objects/manager.py +++ b/integration_objects/manager.py @@ -1,4 +1,4 @@ -from typing import List, Optional, Dict, Union, Type +from typing import List, Optional, Dict, Union, Type, Any from datetime import datetime from sqlalchemy import func from sqlalchemy.orm.attributes import flag_modified @@ -204,7 +204,7 @@ def clear_history( def get_supported_metadata( table_name: str, metadata: Dict[str, Union[str, int, float, bool]] -) -> None: +) -> Dict[str, Any]: supported_keys = get_supported_metadata_keys(table_name) supported_metadata = { key: metadata[key] for key in supported_keys.intersection(metadata.keys()) @@ -214,7 +214,7 @@ def get_supported_metadata( def __rename_metadata( table_name: str, metadata: Dict[str, Union[str, int, float, bool]] -) -> Dict[str, object]: +) -> Dict[str, Any]: rename_keys = { "id": f"{table_name}_id", "created_by": f"{table_name}_created_by", diff --git a/util.py b/util.py index 674a01cf..b36c745c 100644 --- a/util.py +++ b/util.py @@ -213,14 +213,14 @@ def to_json_serializable(x: Any): return x -def to_camel_case(name: str): +def to_camel_case(name: str) -> str: if is_camel_case(name): return name name = sub(r"(_|-)+", " ", name).title().replace(" ", "") return "".join([name[0].lower(), name[1:]]) -def to_snake_case(name: str): +def to_snake_case(name: str) -> str: # ref: https://stackoverflow.com/questions/1175208/elegant-python-function-to-convert-camelcase-to-snake-case if not is_camel_case(name): return name From 42b09fd25be9119c93d4e87866227089d0e0fc32 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 27 Jun 2025 15:41:38 +0200 Subject: [PATCH 098/114] perf: add REFINERY_ATTRIBUTE_ACCESS constants --- business_objects/project.py | 13 ++++++++++--- business_objects/record.py | 12 ++++++++++-- integration_objects/helper.py | 3 +++ 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/business_objects/project.py b/business_objects/project.py index fdf46d7d..48642a58 100644 --- a/business_objects/project.py +++ b/business_objects/project.py @@ -9,6 +9,10 @@ from .. import enums from ..session import session from ..models import Project, Record, Attribute +from ..integration_objects.helper import ( + REFINERY_ATTRIBUTE_ACCESS_GROUPS, + REFINERY_ATTRIBUTE_ACCESS_USERS, +) from ..util import prevent_sql_injection QUEUE_PROJECT_NAME = "@@HIDDEN_QUEUE_PROJECT@@" @@ -158,8 +162,9 @@ def get_all_with_access_management(organization_id: str) -> List[Project]: .join(Attribute, Project.id == Attribute.project_id) .filter( Project.organization_id == organization_id, - Attribute.name.in_(["__ACCESS_GROUPS", "__ACCESS_USERS"]), # - Attribute.name.in_(["__ACCESS_GROUPS", "__ACCESS_USERS"]), + Attribute.name.in_( + [REFINERY_ATTRIBUTE_ACCESS_GROUPS, REFINERY_ATTRIBUTE_ACCESS_USERS] + ), Attribute.user_created == False, Attribute.data_type == enums.DataTypes.PERMISSION.value, Attribute.state == enums.AttributeState.AUTOMATICALLY_CREATED.value, @@ -175,7 +180,9 @@ def check_access_management_active(project_id: str) -> bool: .join(Attribute, Project.id == Attribute.project_id) .filter( Project.id == project_id, - Attribute.name.in_(["__ACCESS_GROUPS", "__ACCESS_USERS"]), + Attribute.name.in_( + [REFINERY_ATTRIBUTE_ACCESS_GROUPS, REFINERY_ATTRIBUTE_ACCESS_USERS] + ), Attribute.user_created == False, Attribute.data_type == enums.DataTypes.PERMISSION.value, Attribute.state == enums.AttributeState.AUTOMATICALLY_CREATED.value, diff --git a/business_objects/record.py b/business_objects/record.py index db94bdf4..36572292 100644 --- a/business_objects/record.py +++ b/business_objects/record.py @@ -15,6 +15,10 @@ Attribute, RecordTokenized, ) +from ..integration_objects.helper import ( + REFINERY_ATTRIBUTE_ACCESS_GROUPS, + REFINERY_ATTRIBUTE_ACCESS_USERS, +) from ..session import session from ..util import prevent_sql_injection @@ -810,8 +814,12 @@ def delete_user_created_attribute( def delete_access_management_attributes( project_id: str, with_commit: bool = True ) -> None: - access_groups_attribute_item = attribute.get_by_name(project_id, "__ACCESS_GROUPS") - access_users_attribute_item = attribute.get_by_name(project_id, "__ACCESS_USERS") + access_groups_attribute_item = attribute.get_by_name( + project_id, REFINERY_ATTRIBUTE_ACCESS_GROUPS + ) + access_users_attribute_item = attribute.get_by_name( + project_id, REFINERY_ATTRIBUTE_ACCESS_USERS + ) if access_users_attribute_item and access_groups_attribute_item: record_items = get_all(project_id=project_id) diff --git a/integration_objects/helper.py b/integration_objects/helper.py index d03a3a3a..d61759fc 100644 --- a/integration_objects/helper.py +++ b/integration_objects/helper.py @@ -3,6 +3,9 @@ from ..enums import Tablenames +REFINERY_ATTRIBUTE_ACCESS_GROUPS = "" +REFINERY_ATTRIBUTE_ACCESS_USERS = "" + DEFAULT_METADATA = {"source", "minio_file_name", "running_id"} TABLE_METADATA = { Tablenames.INTEGRATION_PDF.value: {"file_path", "page", "total_pages", "title"}, From e0d1fb41787569faad82f4dcfdf4da17a4eddbf0 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 27 Jun 2025 16:54:06 +0200 Subject: [PATCH 099/114] fix: query builder for record_ids --- business_objects/embedding.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/business_objects/embedding.py b/business_objects/embedding.py index 0b770ca6..480b5006 100644 --- a/business_objects/embedding.py +++ b/business_objects/embedding.py @@ -324,7 +324,7 @@ def __build_payload_selector( and data_type != enums.DataTypes.PERMISSION.value ): payload_selector += f"'{attr}', (r.\"data\"->>'{attr}')::{data_type}" - if data_type == enums.DataTypes.PERMISSION.value: + if data_type == enums.DataTypes.PERMISSION.value: payload_selector += f"'{attr}', r.\"data\"->'{attr}'" else: payload_selector += f"'{attr}', r.\"data\"->>'{attr}'" @@ -394,7 +394,8 @@ def get_tensors_and_attributes_for_qdrant( WHERE et.project_id = '{project_id}' AND et.embedding_id = '{embedding_id}' """ if record_ids: - query += f" AND r.id IN ('{','.join(record_ids)}')" + _record_ids = "','".join(record_ids) + query += f" AND r.id IN ('{_record_ids}')" return general.execute_all(query) From fafeccd5daef4e9a4e617aeb4c1a309e3ef0011c Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Mon, 30 Jun 2025 23:24:39 +0200 Subject: [PATCH 100/114] perf: monitor.set_integration_task_to_failed with state --- business_objects/monitor.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/business_objects/monitor.py b/business_objects/monitor.py index e17d27c8..3e8b73ec 100644 --- a/business_objects/monitor.py +++ b/business_objects/monitor.py @@ -203,11 +203,13 @@ def set_integration_task_to_failed( integration_id: str, is_synced: bool = False, error_message: Optional[str] = None, + state: Optional[enums.CognitionMarkdownFileState] = None, with_commit: bool = True, ) -> None: + # argument `state` is a workaround for cognition-gateway/api/routes/integrations.delete_many integration_db_bo.update( id=integration_id, - state=enums.CognitionMarkdownFileState.FAILED, + state=state or enums.CognitionMarkdownFileState.FAILED, finished_at=datetime.datetime.now(datetime.timezone.utc), is_synced=is_synced, error_message=error_message, From 535e992e5ce36c223c7ee7103711a563b55b508a Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 1 Jul 2025 09:30:02 +0200 Subject: [PATCH 101/114] perf: add early exit for deleted integrations --- cognition_objects/integration.py | 4 +++- integration_objects/manager.py | 14 ++++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index 4c34b1b1..c2020a77 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -177,8 +177,10 @@ def update( is_synced: Optional[Union[str, bool]] = None, delta_criteria: Optional[Dict[str, str]] = None, with_commit: bool = True, -) -> CognitionIntegration: +) -> Optional[CognitionIntegration]: integration: CognitionIntegration = get_by_id(id) + if not integration: + return None if updated_by is not None: integration.updated_by = updated_by diff --git a/integration_objects/manager.py b/integration_objects/manager.py index a9e81aa1..34b7765f 100644 --- a/integration_objects/manager.py +++ b/integration_objects/manager.py @@ -124,7 +124,12 @@ def create( id: Optional[str] = None, with_commit: bool = True, **metadata, -) -> object: +) -> Optional[object]: + if not integration_db_bo.get_by_id(integration_id): + # If the integration does not exist, + # it was likely deleted during runtime + print(f"Integration with id '{integration_id}' not found", flush=True) + return integration_record = IntegrationModel( created_by=created_by, integration_id=integration_id, @@ -150,7 +155,12 @@ def update( error_message: Optional[str] = None, with_commit: bool = True, **metadata, -) -> object: +) -> Optional[object]: + if not integration_db_bo.get_by_id(integration_id): + # If the integration does not exist, + # it was likely deleted during runtime + print(f"Integration with id '{integration_id}' not found", flush=True) + return integration_record = get(IntegrationModel, integration_id, id) integration_record.updated_by = updated_by From ab273ca3ef00d52eb6f01b0d8e08d98ea7067643 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 1 Jul 2025 09:30:33 +0200 Subject: [PATCH 102/114] perf: add early exit for task execution --- cognition_objects/integration.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cognition_objects/integration.py b/cognition_objects/integration.py index 4c34b1b1..31c8b036 100644 --- a/cognition_objects/integration.py +++ b/cognition_objects/integration.py @@ -224,6 +224,8 @@ def update( def execution_finished(id: str) -> bool: + if not get_by_id(id): + return True return bool( session.query(CognitionIntegration) .filter( From 209fa9405a59d00472b32913a1e39b60c672ddc0 Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Tue, 1 Jul 2025 11:09:07 +0200 Subject: [PATCH 103/114] sharepoint active queue --- business_objects/project.py | 59 ++++++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 21 deletions(-) diff --git a/business_objects/project.py b/business_objects/project.py index 48642a58..62dad1f6 100644 --- a/business_objects/project.py +++ b/business_objects/project.py @@ -1,11 +1,8 @@ from typing import List, Optional, Any, Dict, Union, Set -from sqlalchemy.sql import func -from sqlalchemy import cast, Integer +from sqlalchemy.sql import func, cast from sqlalchemy.sql.functions import coalesce - - +from sqlalchemy import Integer from . import general, attribute - from .. import enums from ..session import session from ..models import Project, Record, Attribute @@ -156,22 +153,42 @@ def get_all(organization_id: str) -> List[Project]: ) -def get_all_with_access_management(organization_id: str) -> List[Project]: - return ( - session.query(Project) - .join(Attribute, Project.id == Attribute.project_id) - .filter( - Project.organization_id == organization_id, - Attribute.name.in_( - [REFINERY_ATTRIBUTE_ACCESS_GROUPS, REFINERY_ATTRIBUTE_ACCESS_USERS] - ), - Attribute.user_created == False, - Attribute.data_type == enums.DataTypes.PERMISSION.value, - Attribute.state == enums.AttributeState.AUTOMATICALLY_CREATED.value, - ) - .distinct() - .all() - ) +def get_all_with_access_management(org_id: str) -> List[Dict[str, Any]]: + org_id_safe = prevent_sql_injection(org_id, isinstance(org_id, str)) + + hidden_status = enums.ProjectStatus.HIDDEN.value + permission_data_type = enums.DataTypes.PERMISSION.value + automatically_created_state = enums.AttributeState.AUTOMATICALLY_CREATED.value + access_groups_attr = REFINERY_ATTRIBUTE_ACCESS_GROUPS + access_users_attr = REFINERY_ATTRIBUTE_ACCESS_USERS + + query = f""" + SELECT DISTINCT + p.*, + CASE + WHEN + ci.id IS NOT NULL + AND (ci.config -> 'extract_kwargs' ->> 'sync_sharepoint_permissions')::text = 'true' + THEN TRUE + ELSE FALSE + END AS is_sharepoint_sync_active + FROM + public.project p + JOIN + public.attribute a ON p.id = a.project_id + LEFT JOIN + cognition.integration ci ON p.id = ci.project_id + WHERE + p.organization_id = '{org_id_safe}' + AND p.status != '{hidden_status}' + AND a.name IN ('{access_groups_attr}', '{access_users_attr}') + AND a.user_created = FALSE + AND a.data_type = '{permission_data_type}' + AND a.state = '{automatically_created_state}'; + """ + + values = general.execute_all(query) + return values def check_access_management_active(project_id: str) -> bool: From 5004048276a52d771ac7e1bb93ccc718c308a254 Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Tue, 1 Jul 2025 15:26:22 +0200 Subject: [PATCH 104/114] unique by name and integration --- cognition_objects/group.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cognition_objects/group.py b/cognition_objects/group.py index 6d14a5c5..727f84fe 100644 --- a/cognition_objects/group.py +++ b/cognition_objects/group.py @@ -65,12 +65,14 @@ def get_all_by_integration_id_permission_grouped( return integration_groups_by_permission -def get_by_name(organization_id: str, name: str): +def get_by_name_and_integration(organization_id: str, integration_id: str, name: str): + integration_id_json = CognitionGroup.meta_data.op("->>")("integration_id") return ( session.query(CognitionGroup) .filter( CognitionGroup.organization_id == organization_id, CognitionGroup.name == name, + integration_id_json == integration_id, ) .first() ) From 1ae14b572d6d9600b027287a9adc1b80aa81d0c6 Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Tue, 1 Jul 2025 18:12:51 +0200 Subject: [PATCH 105/114] typing --- cognition_objects/group.py | 4 +++- cognition_objects/group_member.py | 8 ++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/cognition_objects/group.py b/cognition_objects/group.py index 727f84fe..4deaca06 100644 --- a/cognition_objects/group.py +++ b/cognition_objects/group.py @@ -65,7 +65,9 @@ def get_all_by_integration_id_permission_grouped( return integration_groups_by_permission -def get_by_name_and_integration(organization_id: str, integration_id: str, name: str): +def get_by_name_and_integration( + organization_id: str, integration_id: str, name: str +) -> CognitionGroup: integration_id_json = CognitionGroup.meta_data.op("->>")("integration_id") return ( session.query(CognitionGroup) diff --git a/cognition_objects/group_member.py b/cognition_objects/group_member.py index 7122ec1e..4f5661b4 100644 --- a/cognition_objects/group_member.py +++ b/cognition_objects/group_member.py @@ -1,12 +1,12 @@ from datetime import datetime -from typing import Optional +from typing import Optional, List from ..business_objects import general, user from . import group from ..session import session from ..models import CognitionGroupMember -def get(group_id: str, id: str): +def get(group_id: str, id: str) -> CognitionGroupMember: return ( session.query(CognitionGroupMember) .filter( @@ -27,7 +27,7 @@ def get_by_group_and_user(group_id: str, user_id: str) -> CognitionGroupMember: ) -def get_by_user_id(user_id: str) -> list: +def get_by_user_id(user_id: str) -> List[CognitionGroupMember]: return ( session.query(CognitionGroupMember) .filter(CognitionGroupMember.user_id == user_id) @@ -35,7 +35,7 @@ def get_by_user_id(user_id: str) -> list: ) -def get_all_by_group(group_id: str) -> list: +def get_all_by_group(group_id: str) -> List[CognitionGroupMember]: return ( session.query(CognitionGroupMember) .filter(CognitionGroupMember.group_id == group_id) From 4d81dffba78d6cb164675c43fd28dc8932ebc020 Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Tue, 1 Jul 2025 18:22:38 +0200 Subject: [PATCH 106/114] improve sql --- business_objects/project.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/business_objects/project.py b/business_objects/project.py index 62dad1f6..a88a919c 100644 --- a/business_objects/project.py +++ b/business_objects/project.py @@ -165,13 +165,7 @@ def get_all_with_access_management(org_id: str) -> List[Dict[str, Any]]: query = f""" SELECT DISTINCT p.*, - CASE - WHEN - ci.id IS NOT NULL - AND (ci.config -> 'extract_kwargs' ->> 'sync_sharepoint_permissions')::text = 'true' - THEN TRUE - ELSE FALSE - END AS is_sharepoint_sync_active + COALESCE((ci.config -> 'extract_kwargs' ->> 'sync_sharepoint_permissions')::BOOLEAN,FALSE) AS is_sharepoint_sync_active FROM public.project p JOIN From 91cdd20a65acf748329e51a689e9135eeee71f75 Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Wed, 2 Jul 2025 17:04:17 +0200 Subject: [PATCH 107/114] merge --- cognition_objects/step_templates.py | 281 ++++++++++++++++++++++++++++ cognition_objects/strategy_step.py | 21 ++- enums.py | 5 + models.py | 21 ++- util.py | 42 ++++- 5 files changed, 359 insertions(+), 11 deletions(-) create mode 100644 cognition_objects/step_templates.py diff --git a/cognition_objects/step_templates.py b/cognition_objects/step_templates.py new file mode 100644 index 00000000..73604217 --- /dev/null +++ b/cognition_objects/step_templates.py @@ -0,0 +1,281 @@ +from typing import List, Dict, Any, Iterable, Optional + +from sqlalchemy.orm.attributes import flag_modified +from ..enums import StrategyStepType +from ..business_objects import general +from ..session import session +from ..models import StepTemplates +from ..util import prevent_sql_injection, sql_alchemy_to_dict + + +def get(organization_id: str, template_id: str) -> StepTemplates: + return ( + session.query(StepTemplates) + .filter( + StepTemplates.organization_id == organization_id, + StepTemplates.id == template_id, + ) + .first() + ) + + +def get_all_by_org_id(organization_id: str) -> List[Dict[str, Any]]: + values = [ + sql_alchemy_to_dict(st) + for st in ( + session.query(StepTemplates) + .filter( + StepTemplates.organization_id == organization_id, + ) + .order_by(StepTemplates.created_at.asc()) + .all() + ) + ] + organization_id = prevent_sql_injection(organization_id, isinstance(organization_id, str)) + query = f""" + SELECT jsonb_object_agg(id,C) + FROM ( + SELECT ss.config->>'templateId' id, COUNT(*)c + FROM cognition.strategy_step ss + INNER JOIN cognition.project p + ON ss.project_id = p.id + WHERE p.organization_id = '{organization_id}' + AND ss.step_type = '{StrategyStepType.TEMPLATED.value}' + GROUP BY 1 + )X + """ + template_counts = general.execute_first(query) + template_counts = ( + template_counts[0] if template_counts and template_counts[0] else {} + ) + + values = [ + {**s, "usage_count": template_counts.get(str(s["id"]), 0)} for s in values + ] + + return values + + +def get_all_by_user(organization_id: str, user_id: str) -> List[StepTemplates]: + return ( + session.query(StepTemplates) + .filter( + StepTemplates.organization_id == organization_id, + StepTemplates.created_by == user_id, + ) + .order_by(StepTemplates.created_at.asc()) + .all() + ) + + +# result structure: +# {: { +# "": { +# "strategy_name": , +# "order": , +# "steps": [ +# { +# "step_name": , +# "step_description": , +# "step_type": , +# "progress_text": , +# "execute_if_source_code": , +# "config": , +# "position": +# }, +# ... +# ] +# }, +# ... +# }} +def get_all_existing_steps_for_template_creation(org_id: str) -> Dict[str, Any]: + org_id = prevent_sql_injection(org_id, isinstance(org_id, str)) + query = f""" + SELECT + jsonb_object_agg(proj.project_id::text, proj.proj_json) AS all_projects + FROM ( + SELECT + p.id AS project_id, + jsonb_build_object( + 'project_name', p.name, + 'created_at', p.created_at, + 'strategies', + jsonb_object_agg( + s.id::text, + jsonb_build_object( + 'strategy_name', s.name, + 'order', s."order", + 'steps', + coalesce( + ( + SELECT jsonb_agg( + jsonb_build_object( + 'src_step_id', ss.id, + 'step_name', ss.name, + 'step_description', ss.description, + 'step_type', ss.step_type, + 'progress_text', ss.progress_text, + 'execute_if_source_code', ss.execute_if_source_code, + 'config', ss.config, + 'position', ss.position + ) + ORDER BY ss.position + ) + FROM cognition.strategy_step ss + WHERE ss.project_id = p.id + AND ss.strategy_id = s.id + AND ss.step_type != '{StrategyStepType.TEMPLATED.value}' + ), + '[]' + ) + ) + ) + ) AS proj_json + FROM cognition.project p + INNER JOIN cognition.strategy s + ON s.project_id = p.id + WHERE p.organization_id = '{org_id}' + GROUP BY p.id, p.name + ) AS proj; + """ + result = general.execute_first(query) + if result and result[0]: + return result[0] + return {} + + +def get_step_template_progress_text_lookup_for_strategy( + project_id: str, strategy_id: str, step_id: Optional[str] = None +) -> Dict[str, str]: + project_id = prevent_sql_injection(project_id, isinstance(project_id, str)) + strategy_id = prevent_sql_injection(strategy_id, isinstance(strategy_id, str)) + step_id_filter = "" + if step_id: + step_id = prevent_sql_injection(step_id, isinstance(step_id, str)) + step_id_filter = f"AND ss.id = '{step_id}'" + query = f""" + WITH base AS ( + SELECT + st.config AS config, + ss.config->'variableValues' AS variableValues, + ss.id step_id + FROM cognition.strategy_step ss + INNER JOIN cognition.project p + ON ss.project_id = p.id + INNER JOIN cognition.step_templates st + ON st.id = (ss.config->>'templateId')::UUID + AND st.organization_id = p.organization_id + WHERE + ss.project_id = '{project_id}' + AND ss.strategy_id = '{strategy_id}' + AND ss.step_type = '{StrategyStepType.TEMPLATED.value}' + {step_id_filter} + ) + + + SELECT jsonb_object_agg(step_id,progress_lookup) + FROM ( + SELECT + step_id, + array_agg(jsonb_build_object('template_key',dict_key,'progress_text',resolved_progress_text)) progress_lookup + FROM ( + SELECT + -- Extract one step-object at a time + step_id, + (step_item ->> 'stepName') AS step_name, + step_item->>'stepType'|| '@' || (step_item->>'srcStepId')::TEXT AS dict_key, + -- Raw progressText from the JSON + (step_item ->> 'progressText') AS raw_progress_text, + -- If raw_progress_text matches @@var_@@, replace via variableValues + CASE + WHEN (step_item ->> 'progressText') ~ '^@@var_[0-9a-fA-F\-]{{36}}@@$' + THEN + -- Extract the UUID between "var_" and "@@" + ( + SELECT variableValues ->> inner_uuid + FROM ( + SELECT + regexp_replace(step_item ->> 'progressText', + '^@@var_([0-9a-fA-F\-]{{36}})@@$', + '\\1') AS inner_uuid + ) AS sub + ) + ELSE + (step_item ->> 'progressText') + END AS resolved_progress_text + FROM base + -- Unnest the steps array + CROSS JOIN LATERAL json_array_elements(base.config->'steps') AS step_item + )x + GROUP BY 1 + )y + """ + result = general.execute_first(query) + if result and result[0]: + return result[0] + return {} + + +def create( + org_id: str, + user_id: str, + name: str, + description: str, + config: Dict[str, Any], + with_commit: bool = True, +) -> StepTemplates: + template: StepTemplates = StepTemplates( + organization_id=org_id, + name=name, + description=description, + created_by=user_id, + config=config, + ) + general.add(template, with_commit) + + return template + + +def update( + org_id: str, + template_id: str, + name: Optional[str] = None, + description: Optional[str] = None, + config: Optional[Dict[str, Any]] = None, + with_commit: bool = True, +) -> StepTemplates: + template = get(org_id, template_id) + if not template: + raise ValueError( + f"Template with ID {template_id} not found in organization {org_id}." + ) + + if name is not None: + template.name = name + if description is not None: + template.description = description + if config is not None: + template.config = config + flag_modified(template, "config") + + general.flush_or_commit(with_commit) + + return template + + +def delete(org_id: str, template_id: str, with_commit: bool = True) -> None: + session.query(StepTemplates).filter( + StepTemplates.organization_id == org_id, + StepTemplates.id == template_id, + ).delete() + general.flush_or_commit(with_commit) + + +def delete_many( + org_id: str, template_ids: Iterable[str], with_commit: bool = True +) -> None: + session.query(StepTemplates).filter( + StepTemplates.organization_id == org_id, + StepTemplates.id.in_(template_ids), + ).delete() + general.flush_or_commit(with_commit) diff --git a/cognition_objects/strategy_step.py b/cognition_objects/strategy_step.py index 8aa976a2..bf3b2d92 100644 --- a/cognition_objects/strategy_step.py +++ b/cognition_objects/strategy_step.py @@ -1,6 +1,8 @@ -from typing import List, Optional, Dict, Any +from typing import List, Optional, Dict, Any, Iterable, Tuple from datetime import datetime from sqlalchemy.orm.attributes import flag_modified +from sqlalchemy import tuple_ + from ..business_objects import general from ..session import session @@ -19,6 +21,23 @@ def get(project_id: str, strategy_step_id: str) -> CognitionStrategyStep: ) +def get_all_by_project_and_ids( + project_step_tuple: Iterable[Tuple[str, str]], +) -> List[CognitionStrategyStep]: + if not project_step_tuple: + return [] + + return ( + session.query(CognitionStrategyStep) + .filter( + tuple_(CognitionStrategyStep.project_id, CognitionStrategyStep.id).in_( + project_step_tuple + ) + ) + .all() + ) + + def get_all_by_strategy_id( project_id: str, strategy_id: str ) -> List[CognitionStrategyStep]: diff --git a/enums.py b/enums.py index 8dc02764..b9a9461d 100644 --- a/enums.py +++ b/enums.py @@ -167,6 +167,7 @@ class Tablenames(Enum): INTEGRATION_GITHUB_ISSUE = "github_issue" INTEGRATION_PDF = "pdf" INTEGRATION_SHAREPOINT = "sharepoint" + STEP_TEMPLATES = "step_templates" # templates for strategy steps def snake_case_to_pascal_case(self): # the type name (written in PascalCase) of a table is needed to create backrefs @@ -557,6 +558,7 @@ class StrategyStepType(Enum): NEURAL_SEARCH = "NEURAL_SEARCH" WEBHOOK = "WEBHOOK" GRAPHRAG_SEARCH = "GRAPHRAG_SEARCH" + TEMPLATED = "TEMPLATED" def get_description(self): return STEP_DESCRIPTIONS.get(self, "No description available") @@ -584,6 +586,7 @@ def get_progress_text(self): StrategyStepType.CALL_OTHER_AGENT: "Retrieve results from other agents", StrategyStepType.WEBHOOK: "Webhook", StrategyStepType.GRAPHRAG_SEARCH: "Query GraphRAG index", + StrategyStepType.TEMPLATED: "Templated step", } STEP_WHEN_TO_USE = { @@ -601,6 +604,7 @@ def get_progress_text(self): StrategyStepType.CALL_OTHER_AGENT: "When you want to call another agent", StrategyStepType.WEBHOOK: "When you want to run a webhook", StrategyStepType.GRAPHRAG_SEARCH: "When you want to query a knowledge graph", + StrategyStepType.TEMPLATED: "When you want to reuse existing templates", } STEP_PROGRESS_TEXTS = { @@ -619,6 +623,7 @@ def get_progress_text(self): StrategyStepType.CALL_OTHER_AGENT: "Calling another agent", StrategyStepType.WEBHOOK: "Running webhook", StrategyStepType.GRAPHRAG_SEARCH: "Querying knowledge graph", + StrategyStepType.TEMPLATED: "Running templated step", } STEP_ERRORS = { diff --git a/models.py b/models.py index 858b192b..ab02a204 100644 --- a/models.py +++ b/models.py @@ -1934,16 +1934,31 @@ class GraphRAGIndex(Base): root_dir = Column(String) -class CognitionGroup(Base): - __tablename__ = Tablenames.GROUP.value +class StepTemplates(Base): + __tablename__ = Tablenames.STEP_TEMPLATES.value __table_args__ = {"schema": "cognition"} + name = Column(String, unique=True) id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) organization_id = Column( UUID(as_uuid=True), ForeignKey(f"{Tablenames.ORGANIZATION.value}.id", ondelete="CASCADE"), index=True, ) - name = Column(String, unique=True) + config = Column(JSON) # JSON schema for the step template + # config contains all step configurations in an array & variable fields to be changed on useage + # e.g. + # { + # "variables": [ + # {"name": "Env var", "path": "[0].config.llmConfig.environmentVariable", "hasDefault": True, "defaultValue": "OpenAI Leo"}, + # {"name": "System Prompt", "path": "[0].config.templatePrompt", "hasDefault": False}, + # ], + # "steps": [{...},{...}] + # } + + +class CognitionGroup(Base): + __tablename__ = Tablenames.GROUP.value + name = Column(String) description = Column(String) created_at = Column(DateTime, default=sql.func.now()) created_by = Column( diff --git a/util.py b/util.py index b36c745c..d2ef304f 100644 --- a/util.py +++ b/util.py @@ -2,7 +2,7 @@ from typing import Tuple, Any, Union, List, Dict, Optional, Iterable from pydantic import BaseModel from collections.abc import Iterable as collections_abc_Iterable -from re import sub, match, compile +from re import sub, match, compile, IGNORECASE import sqlalchemy import decimal from uuid import UUID @@ -19,6 +19,10 @@ compile(r"(.)([A-Z][a-z]+)"), compile(r"([a-z0-9])([A-Z])"), ] +UUID_REGEX_PATTERN = compile( + r"^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + IGNORECASE, +) STRING_TRUE_VALUES = {"true", "x", "1", "y"} @@ -109,12 +113,13 @@ def sql_alchemy_to_dict( column_whitelist: Optional[Iterable[str]] = None, column_blacklist: Optional[Iterable[str]] = None, column_rename_map: Optional[Dict[str, str]] = None, + dont_wrap_uuids: bool = True, ): result = __sql_alchemy_to_dict( sql_alchemy_object, column_whitelist, column_blacklist, column_rename_map ) if for_frontend: - return to_frontend_obj(result) + return to_frontend_obj(result, dont_wrap_uuids=dont_wrap_uuids) return result @@ -175,18 +180,29 @@ def rename_columns(data: Any) -> Any: return sql_alchemy_object -def to_frontend_obj(value: Union[List, Dict], blacklist_keys: List[str] = []): +def to_frontend_obj( + value: Union[List, Dict], + blacklist_keys: List[str] = [], + dont_wrap_uuids: bool = True, +): if isinstance(value, dict): return { - to_camel_case(k): ( - to_frontend_obj(v, blacklist_keys=blacklist_keys) + to_camel_case(k, dont_wrap_uuids=dont_wrap_uuids): ( + to_frontend_obj( + v, blacklist_keys=blacklist_keys, dont_wrap_uuids=dont_wrap_uuids + ) if k not in blacklist_keys else v ) for k, v in value.items() } elif is_list_like(value): - return [to_frontend_obj(x, blacklist_keys=blacklist_keys) for x in value] + return [ + to_frontend_obj( + x, blacklist_keys=blacklist_keys, dont_wrap_uuids=dont_wrap_uuids + ) + for x in value + ] else: return to_json_serializable(value) @@ -213,9 +229,11 @@ def to_json_serializable(x: Any): return x -def to_camel_case(name: str) -> str: +def to_camel_case(name: str, dont_wrap_uuids: bool = True): if is_camel_case(name): return name + if dont_wrap_uuids and is_uuid(name): + return name name = sub(r"(_|-)+", " ", name).title().replace(" ", "") return "".join([name[0].lower(), name[1:]]) @@ -246,6 +264,16 @@ def is_camel_case(text: str) -> bool: return False +def is_uuid(value: Union[str, UUID]) -> bool: + if isinstance(value, UUID): + return True + elif isinstance(value, str): + if UUID_REGEX_PATTERN.fullmatch(value): + return True + return False + return False + + # str is expected but depending on the attack vector e.g. the type hints don't mean anything so an int could still receive a string # the idea is that every directly inserted variable (e.g. project_id) is run through this function before being used in a plain text query # orm model is sufficient for most cases but for raw queries we mask all directly included variables From 36a48c7f5f5d40a0bc7b773f4829052857a9a200 Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Wed, 2 Jul 2025 17:04:22 +0200 Subject: [PATCH 108/114] model --- models.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/models.py b/models.py index ab02a204..75c5e09d 100644 --- a/models.py +++ b/models.py @@ -1937,13 +1937,20 @@ class GraphRAGIndex(Base): class StepTemplates(Base): __tablename__ = Tablenames.STEP_TEMPLATES.value __table_args__ = {"schema": "cognition"} - name = Column(String, unique=True) id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) organization_id = Column( UUID(as_uuid=True), ForeignKey(f"{Tablenames.ORGANIZATION.value}.id", ondelete="CASCADE"), index=True, ) + name = Column(String) + description = Column(String) + created_at = Column(DateTime, default=sql.func.now()) + created_by = Column( + UUID(as_uuid=True), + ForeignKey(f"{Tablenames.USER.value}.id", ondelete="SET NULL"), + index=True, + ) config = Column(JSON) # JSON schema for the step template # config contains all step configurations in an array & variable fields to be changed on useage # e.g. From 5ef1483859cfea9e5bbc5de25fad8e90aeb09bc2 Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Wed, 2 Jul 2025 17:11:24 +0200 Subject: [PATCH 109/114] fix group --- models.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/models.py b/models.py index 75c5e09d..586579b4 100644 --- a/models.py +++ b/models.py @@ -1965,7 +1965,14 @@ class StepTemplates(Base): class CognitionGroup(Base): __tablename__ = Tablenames.GROUP.value - name = Column(String) + __table_args__ = {"schema": "cognition"} + id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + organization_id = Column( + UUID(as_uuid=True), + ForeignKey(f"{Tablenames.ORGANIZATION.value}.id", ondelete="CASCADE"), + index=True, + ) + name = Column(String, unique=True) description = Column(String) created_at = Column(DateTime, default=sql.func.now()) created_by = Column( From e38a00732c88cfaeb7f2aa30ccb0d69849d38427 Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Thu, 3 Jul 2025 11:36:06 +0200 Subject: [PATCH 110/114] remove unique --- models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models.py b/models.py index 586579b4..fa9ad548 100644 --- a/models.py +++ b/models.py @@ -1972,7 +1972,7 @@ class CognitionGroup(Base): ForeignKey(f"{Tablenames.ORGANIZATION.value}.id", ondelete="CASCADE"), index=True, ) - name = Column(String, unique=True) + name = Column(String) description = Column(String) created_at = Column(DateTime, default=sql.func.now()) created_by = Column( From 71c9603e0d64c7254e02d656df048b2fbb1c2ddd Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 3 Jul 2025 15:44:19 +0200 Subject: [PATCH 111/114] perf: add file_properties integration column --- integration_objects/helper.py | 1 + models.py | 1 + 2 files changed, 2 insertions(+) diff --git a/integration_objects/helper.py b/integration_objects/helper.py index d61759fc..5ac51b73 100644 --- a/integration_objects/helper.py +++ b/integration_objects/helper.py @@ -32,6 +32,7 @@ "mime_type", "hashes", "permissions", + "file_properties", }, } diff --git a/models.py b/models.py index fa9ad548..57c891f5 100644 --- a/models.py +++ b/models.py @@ -2387,3 +2387,4 @@ class IntegrationSharepoint(Base): mime_type = Column(String) hashes = Column(JSON) permissions = Column(JSON) + file_properties = Column(JSON) From 602baf9f1cce5f6cb8ff270bc48f9b4984f255ed Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 3 Jul 2025 15:54:50 +0200 Subject: [PATCH 112/114] perf: update default state for set_integration_task_to_failed --- business_objects/monitor.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/business_objects/monitor.py b/business_objects/monitor.py index 3e8b73ec..540741a8 100644 --- a/business_objects/monitor.py +++ b/business_objects/monitor.py @@ -203,13 +203,15 @@ def set_integration_task_to_failed( integration_id: str, is_synced: bool = False, error_message: Optional[str] = None, - state: Optional[enums.CognitionMarkdownFileState] = None, + state: Optional[ + enums.CognitionMarkdownFileState + ] = enums.CognitionMarkdownFileState.FAILED, with_commit: bool = True, ) -> None: # argument `state` is a workaround for cognition-gateway/api/routes/integrations.delete_many integration_db_bo.update( id=integration_id, - state=state or enums.CognitionMarkdownFileState.FAILED, + state=state, finished_at=datetime.datetime.now(datetime.timezone.utc), is_synced=is_synced, error_message=error_message, From 06f7509b0260b4f95110309944231e771b5b465d Mon Sep 17 00:00:00 2001 From: JWittmeyer Date: Thu, 3 Jul 2025 17:18:42 +0200 Subject: [PATCH 113/114] Adds option filter for pid --- business_objects/project.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/business_objects/project.py b/business_objects/project.py index a88a919c..26cc8d7a 100644 --- a/business_objects/project.py +++ b/business_objects/project.py @@ -116,12 +116,18 @@ def __build_sql_data_slices_by_project(project_id: str) -> str: project.id = '{project_id}'::UUID; """ -def get_dropdown_list_project_list(org_id: str) -> List[Dict[str, str]]: +def get_dropdown_list_project_list( + org_id: str, project_id: Optional[str] = None +) -> List[Dict[str, str]]: org_id = prevent_sql_injection(org_id, isinstance(org_id, str)) + prj_filter = "" + if project_id: + project_id = prevent_sql_injection(project_id, isinstance(project_id, str)) + prj_filter = f"AND p.id = '{project_id}'" query = f""" SELECT array_agg(jsonb_build_object('value', p.id,'name',p.NAME)) FROM public.project p - WHERE p.organization_id = '{org_id}' AND p.status != '{enums.ProjectStatus.HIDDEN.value}' + WHERE p.organization_id = '{org_id}' AND p.status != '{enums.ProjectStatus.HIDDEN.value}' {prj_filter} """ values = general.execute_first(query) From 9c2f2a1bc386567fb9123d46a74f203fcb0b4ced Mon Sep 17 00:00:00 2001 From: JWittmeyer Date: Thu, 3 Jul 2025 17:26:57 +0200 Subject: [PATCH 114/114] PR comments --- cognition_objects/group.py | 7 ++----- integration_objects/manager.py | 2 +- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/cognition_objects/group.py b/cognition_objects/group.py index 4deaca06..75dac238 100644 --- a/cognition_objects/group.py +++ b/cognition_objects/group.py @@ -58,11 +58,8 @@ def get_all_by_integration_id_permission_grouped( ) .all() ) - integration_groups_by_permission = {} - for group in integration_groups: - permission_id = group.meta_data.get("permission_id") - integration_groups_by_permission[permission_id] = group - return integration_groups_by_permission + + return {group.meta_data.get("permission_id"): group for group in integration_groups} def get_by_name_and_integration( diff --git a/integration_objects/manager.py b/integration_objects/manager.py index 34b7765f..7979678f 100644 --- a/integration_objects/manager.py +++ b/integration_objects/manager.py @@ -13,7 +13,7 @@ def get( IntegrationModel: Type, integration_id: str, id: Optional[str] = None, -) -> object: +) -> Union[List[object], object]: query = session.query(IntegrationModel).filter( IntegrationModel.integration_id == integration_id, )