opsmill
diff --git a/‎backend/infrahub/cli/db.py‎
Lines changed: 24 additions & 0 deletions b/‎backend/infrahub/cli/db.py‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎backend/infrahub/cli/db_commands/clean_duplicate_schema_fields.py‎
Lines changed: 212 additions & 0 deletions b/‎backend/infrahub/cli/db_commands/clean_duplicate_schema_fields.py‎
Lines changed: 212 additions & 0 deletions
diff --git a/‎backend/infrahub/core/schema/definitions/internal.py‎
Lines changed: 4 additions & 0 deletions b/‎backend/infrahub/core/schema/definitions/internal.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎backend/infrahub/core/validators/determiner.py‎
Lines changed: 12 additions & 1 deletion b/‎backend/infrahub/core/validators/determiner.py‎
Lines changed: 12 additions & 1 deletion
@@ -54,6 +54,7 @@
 
 from .constants import ERROR_BADGE, FAILED_BADGE, SUCCESS_BADGE
 from .db_commands.check_inheritance import check_inheritance
+from .db_commands.clean_duplicate_schema_fields import clean_duplicate_schema_fields
 from .patch import patch_app
 
 
@@ -200,6 +201,29 @@ async def check_inheritance_cmd(
     await dbdriver.close()
 
 
+@app.command(name="check-duplicate-schema-fields")
+async def check_duplicate_schema_fields_cmd(
+    ctx: typer.Context,
+    fix: bool = typer.Option(False, help="Fix the duplicate schema fields on the default branch."),
+    config_file: str = typer.Argument("infrahub.toml", envvar="INFRAHUB_CONFIG"),
+) -> None:
+    """Check for any duplicate schema attributes or relationships on the default branch"""
+    logging.getLogger("infrahub").setLevel(logging.WARNING)
+    logging.getLogger("neo4j").setLevel(logging.ERROR)
+    logging.getLogger("prefect").setLevel(logging.ERROR)
+
+    config.load_and_exit(config_file_name=config_file)
+
+    context: CliContext = ctx.obj
+    dbdriver = await context.init_db(retry=1)
+
+    success = await clean_duplicate_schema_fields(db=dbdriver, fix=fix)
+    if not success:
+        raise typer.Exit(code=1)
+
+    await dbdriver.close()
+
+
 @app.command(name="update-core-schema")
 async def update_core_schema_cmd(
     ctx: typer.Context,
 
@@ -0,0 +1,212 @@
+from dataclasses import dataclass
+from enum import Enum
+from typing import Any
+
+from rich import print as rprint
+from rich.console import Console
+from rich.table import Table
+
+from infrahub.cli.constants import FAILED_BADGE, SUCCESS_BADGE
+from infrahub.core.query import Query, QueryType
+from infrahub.database import InfrahubDatabase
+
+
+class SchemaFieldType(str, Enum):
+    ATTRIBUTE = "attribute"
+    RELATIONSHIP = "relationship"
+
+
+@dataclass
+class SchemaFieldDetails:
+    schema_kind: str
+    schema_uuid: str
+    field_type: SchemaFieldType
+    field_name: str
+
+
+class DuplicateSchemaFields(Query):
+    async def query_init(self, db: InfrahubDatabase, **kwargs: dict[str, Any]) -> None:  # noqa: ARG002
+        query = """
+MATCH (root:Root)
+LIMIT 1
+WITH root.default_branch AS default_branch
+MATCH (field:SchemaAttribute|SchemaRelationship)
+CALL (default_branch, field) {
+    MATCH (field)-[is_part_of:IS_PART_OF]->(:Root)
+    WHERE is_part_of.branch = default_branch
+    ORDER BY is_part_of.from DESC
+    RETURN is_part_of
+    LIMIT 1
+}
+WITH default_branch, field, CASE
+    WHEN is_part_of.status = "active" AND is_part_of.to IS NULL THEN is_part_of.from
+    ELSE NULL
+END AS active_from
+WHERE active_from IS NOT NULL
+WITH default_branch, field, active_from, "SchemaAttribute" IN labels(field) AS is_attribute
+CALL (field, default_branch) {
+    MATCH (field)-[r1:HAS_ATTRIBUTE]->(:Attribute {name: "name"})-[r2:HAS_VALUE]->(name_value:AttributeValue)
+    WHERE r1.branch = default_branch AND r2.branch = default_branch
+    AND r1.status = "active" AND r2.status = "active"
+    AND r1.to IS NULL AND r2.to IS NULL
+    ORDER BY r1.from DESC, r1.status ASC, r2.from DESC, r2.status ASC
+    LIMIT 1
+    RETURN name_value.value AS field_name
+}
+CALL (field, default_branch) {
+    MATCH (field)-[r1:IS_RELATED]-(rel:Relationship)-[r2:IS_RELATED]-(peer:SchemaNode|SchemaGeneric)
+    WHERE rel.name IN ["schema__node__relationships", "schema__node__attributes"]
+    AND r1.branch = default_branch AND r2.branch = default_branch
+    AND r1.status = "active" AND r2.status = "active"
+    AND r1.to IS NULL AND r2.to IS NULL
+    ORDER BY r1.from DESC, r1.status ASC, r2.from DESC, r2.status ASC
+    LIMIT 1
+    RETURN peer AS schema_vertex
+}
+WITH default_branch, field, field_name, is_attribute, active_from, schema_vertex
+ORDER BY active_from DESC
+WITH default_branch, field_name, is_attribute, schema_vertex, collect(field) AS fields_reverse_chron
+WHERE size(fields_reverse_chron) > 1
+        """
+        self.add_to_query(query)
+
+
+class GetDuplicateSchemaFields(DuplicateSchemaFields):
+    """
+    Get the kind, field type, and field name for any duplicated attributes or relationships on a given schema
+    on the default branch
+    """
+
+    name = "get_duplicate_schema_fields"
+    type = QueryType.READ
+    insert_return = False
+
+    async def query_init(self, db: InfrahubDatabase, **kwargs: dict[str, Any]) -> None:
+        await super().query_init(db=db, **kwargs)
+        query = """
+CALL (schema_vertex, default_branch) {
+    MATCH (schema_vertex)-[r1:HAS_ATTRIBUTE]->(:Attribute {name: "namespace"})-[r2:HAS_VALUE]->(name_value:AttributeValue)
+    WHERE r1.branch = default_branch AND r2.branch = default_branch
+    ORDER BY r1.from DESC, r1.status ASC, r2.from DESC, r2.status ASC
+    LIMIT 1
+    RETURN name_value.value AS schema_namespace
+}
+CALL (schema_vertex, default_branch) {
+    MATCH (schema_vertex)-[r1:HAS_ATTRIBUTE]->(:Attribute {name: "name"})-[r2:HAS_VALUE]->(name_value:AttributeValue)
+    WHERE r1.branch = default_branch AND r2.branch = default_branch
+    ORDER BY r1.from DESC, r1.status ASC, r2.from DESC, r2.status ASC
+    LIMIT 1
+    RETURN name_value.value AS schema_name
+}
+RETURN schema_namespace + schema_name AS schema_kind, schema_vertex.uuid AS schema_uuid, field_name, is_attribute
+ORDER BY schema_kind ASC, is_attribute DESC, field_name ASC
+        """
+        self.return_labels = ["schema_kind", "schema_uuid", "field_name", "is_attribute"]
+        self.add_to_query(query)
+
+    def get_schema_field_details(self) -> list[SchemaFieldDetails]:
+        schema_field_details: list[SchemaFieldDetails] = []
+        for result in self.results:
+            schema_kind = result.get_as_type(label="schema_kind", return_type=str)
+            schema_uuid = result.get_as_type(label="schema_uuid", return_type=str)
+            field_name = result.get_as_type(label="field_name", return_type=str)
+            is_attribute = result.get_as_type(label="is_attribute", return_type=bool)
+            schema_field_details.append(
+                SchemaFieldDetails(
+                    schema_kind=schema_kind,
+                    schema_uuid=schema_uuid,
+                    field_name=field_name,
+                    field_type=SchemaFieldType.ATTRIBUTE if is_attribute else SchemaFieldType.RELATIONSHIP,
+                )
+            )
+        return schema_field_details
+
+
+class FixDuplicateSchemaFields(DuplicateSchemaFields):
+    """
+    Fix the duplicate schema fields by hard deleting the earlier duplicate(s)
+    """
+
+    name = "fix_duplicate_schema_fields"
+    type = QueryType.WRITE
+    insert_return = False
+
+    async def query_init(self, db: InfrahubDatabase, **kwargs: dict[str, Any]) -> None:
+        await super().query_init(db=db, **kwargs)
+        query = """
+WITH default_branch, tail(fields_reverse_chron) AS fields_to_delete
+UNWIND fields_to_delete AS field_to_delete
+CALL (field_to_delete, default_branch) {
+    MATCH (field_to_delete)-[r:IS_PART_OF {branch: default_branch}]-()
+    DELETE r
+    WITH field_to_delete
+    MATCH (field_to_delete)-[:IS_RELATED {branch: default_branch}]-(rel:Relationship)
+    WITH DISTINCT field_to_delete, rel
+    MATCH (rel)-[r {branch: default_branch}]-()
+    DELETE r
+    WITH field_to_delete, rel
+    OPTIONAL MATCH (rel)
+    WHERE NOT exists((rel)--())
+    DELETE rel
+    WITH DISTINCT field_to_delete
+    MATCH (field_to_delete)-[:HAS_ATTRIBUTE {branch: default_branch}]->(attr:Attribute)
+    MATCH (attr)-[r {branch: default_branch}]-()
+    DELETE r
+    WITH field_to_delete, attr
+    OPTIONAL MATCH (attr)
+    WHERE NOT exists((attr)--())
+    DELETE attr
+    WITH DISTINCT field_to_delete
+    OPTIONAL MATCH (field_to_delete)
+    WHERE NOT exists((field_to_delete)--())
+    DELETE field_to_delete
+}
+        """
+        self.add_to_query(query)
+
+
+def display_duplicate_schema_fields(duplicate_schema_fields: list[SchemaFieldDetails]) -> None:
+    console = Console()
+
+    table = Table(title="Duplicate Schema Fields on Default Branch")
+
+    table.add_column("Schema Kind")
+    table.add_column("Schema UUID")
+    table.add_column("Field Name")
+    table.add_column("Field Type")
+
+    for duplicate_schema_field in duplicate_schema_fields:
+        table.add_row(
+            duplicate_schema_field.schema_kind,
+            duplicate_schema_field.schema_uuid,
+            duplicate_schema_field.field_name,
+            duplicate_schema_field.field_type.value,
+        )
+
+    console.print(table)
+
+
+async def clean_duplicate_schema_fields(db: InfrahubDatabase, fix: bool = False) -> bool:
+    """
+    Identify any attributes or relationships that are duplicated in a schema on the default branch
+    If fix is True, runs cypher queries to hard delete the earlier duplicate
+    """
+
+    duplicate_schema_fields_query = await GetDuplicateSchemaFields.init(db=db)
+    await duplicate_schema_fields_query.execute(db=db)
+    duplicate_schema_fields = duplicate_schema_fields_query.get_schema_field_details()
+
+    if not duplicate_schema_fields:
+        rprint(f"{SUCCESS_BADGE} No duplicate schema fields found")
+        return True
+
+    display_duplicate_schema_fields(duplicate_schema_fields)
+
+    if not fix:
+        rprint(f"{FAILED_BADGE} Use the --fix flag to fix the duplicate schema fields")
+        return False
+
+    fix_duplicate_schema_fields_query = await FixDuplicateSchemaFields.init(db=db)
+    await fix_duplicate_schema_fields_query.execute(db=db)
+    rprint(f"{SUCCESS_BADGE} Duplicate schema fields deleted from the default branch")
+    return True
@@ -180,6 +180,7 @@ class SchemaNode(BaseModel):
     attributes: list[SchemaAttribute]
     relationships: list[SchemaRelationship]
     display_labels: list[str]
+    uniqueness_constraints: list[list[str]] | None = None
 
     def to_dict(self) -> dict[str, Any]:
         return {
@@ -195,6 +196,7 @@ def to_dict(self) -> dict[str, Any]:
             ],
             "relationships": [relationship.to_dict() for relationship in self.relationships],
             "display_labels": self.display_labels,
+            "uniqueness_constraints": self.uniqueness_constraints,
         }
 
     def without_duplicates(self, other: SchemaNode) -> SchemaNode:
@@ -465,6 +467,7 @@ def to_dict(self) -> dict[str, Any]:
     include_in_menu=False,
     default_filter=None,
     display_labels=["name__value"],
+    uniqueness_constraints=[["name__value", "node"]],
     attributes=[
         SchemaAttribute(
             name="id",
@@ -669,6 +672,7 @@ def to_dict(self) -> dict[str, Any]:
     include_in_menu=False,
     default_filter=None,
     display_labels=["name__value"],
+    uniqueness_constraints=[["name__value", "node"]],
     attributes=[
         SchemaAttribute(
             name="id",
 
@@ -10,6 +10,7 @@
 from infrahub.core.schema.relationship_schema import RelationshipSchema
 from infrahub.core.schema.schema_branch import SchemaBranch
 from infrahub.core.validators import CONSTRAINT_VALIDATOR_MAP
+from infrahub.exceptions import SchemaNotFoundError
 from infrahub.log import get_logger
 
 if TYPE_CHECKING:
@@ -81,7 +82,17 @@ async def _get_constraints_for_one_schema(self, schema: MainSchemaTypes) -> list
 
     async def _get_all_property_constraints(self) -> list[SchemaUpdateConstraintInfo]:
         constraints: list[SchemaUpdateConstraintInfo] = []
-        for schema in self.schema_branch.get_all().values():
+        schemas = list(self.schema_branch.get_all(duplicate=False).values())
+        # added here to check their uniqueness constraints
+        try:
+            schemas.append(self.schema_branch.get_node(name="SchemaAttribute", duplicate=False))
+        except SchemaNotFoundError:
+            pass
+        try:
+            schemas.append(self.schema_branch.get_node(name="SchemaRelationship", duplicate=False))
+        except SchemaNotFoundError:
+            pass
+        for schema in schemas:
             constraints.extend(await self._get_property_constraints_for_one_schema(schema=schema))
         return constraints