|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +from typing import TYPE_CHECKING, Any, Sequence |
| 4 | + |
| 5 | +from infrahub.core.migrations.shared import MigrationResult |
| 6 | +from infrahub.core.query import Query, QueryType |
| 7 | + |
| 8 | +from ..shared import GraphMigration |
| 9 | + |
| 10 | +if TYPE_CHECKING: |
| 11 | + from infrahub.database import InfrahubDatabase |
| 12 | + |
| 13 | + |
| 14 | +class DeleteDuplicatedAttributesQuery(Query): |
| 15 | + name: str = "delete_duplicated_attributes" |
| 16 | + type: QueryType = QueryType.WRITE |
| 17 | + insert_return: bool = False |
| 18 | + insert_limit: bool = False |
| 19 | + |
| 20 | + async def query_init(self, db: InfrahubDatabase, **kwargs: dict[str, Any]) -> None: # noqa: ARG002 |
| 21 | + query = """ |
| 22 | +// ------------- |
| 23 | +// get all the Nodes linked to multiple Attributes with the same name to drastically reduce the search space |
| 24 | +// ------------- |
| 25 | +MATCH (n:Node)-[:HAS_ATTRIBUTE]->(attr:Attribute) |
| 26 | +WITH DISTINCT n, attr |
| 27 | +WITH n, attr.name AS attr_name, count(*) AS num_attrs |
| 28 | +WHERE num_attrs > 1 |
| 29 | +// ------------- |
| 30 | +// for each Node-attr_name pair, get the possible duplicate Attributes |
| 31 | +// ------------- |
| 32 | +MATCH (n)-[:HAS_ATTRIBUTE]->(dup_attr:Attribute {name: attr_name}) |
| 33 | +WITH DISTINCT n, dup_attr |
| 34 | +// ------------- |
| 35 | +// get the branch(es) for each possible duplicate Attribute |
| 36 | +// ------------- |
| 37 | +CALL (n, dup_attr) { |
| 38 | + MATCH (n)-[r:HAS_ATTRIBUTE {status: "active"}]->(dup_attr) |
| 39 | + WHERE r.to IS NULL |
| 40 | + AND NOT exists((n)-[:HAS_ATTRIBUTE {status: "deleted", branch: r.branch}]->(dup_attr)) |
| 41 | + RETURN r.branch AS branch |
| 42 | +} |
| 43 | +// ------------- |
| 44 | +// get the latest update time for each duplicate Attribute on each branch |
| 45 | +// ------------- |
| 46 | +CALL (dup_attr, branch) { |
| 47 | + MATCH (dup_attr)-[r {branch: branch}]-() |
| 48 | + RETURN max(r.from) AS latest_update |
| 49 | +} |
| 50 | +// ------------- |
| 51 | +// order the duplicate Attributes by latest update time |
| 52 | +// ------------- |
| 53 | +WITH n, dup_attr, branch, latest_update |
| 54 | +ORDER BY n, branch, dup_attr.name, latest_update DESC |
| 55 | +// ------------- |
| 56 | +// for any Node-dup_attr_name pairs with multiple duplicate Attributes, keep the Attribute with the latest update |
| 57 | +// on this branch and delete all the other edges on this branch for this Attribute |
| 58 | +// ------------- |
| 59 | +WITH n, branch, dup_attr.name AS dup_attr_name, collect(dup_attr) AS dup_attrs_reverse_chronological |
| 60 | +WHERE size(dup_attrs_reverse_chronological) > 1 |
| 61 | +WITH branch, tail(dup_attrs_reverse_chronological) AS dup_attrs_to_delete |
| 62 | +UNWIND dup_attrs_to_delete AS dup_attr_to_delete |
| 63 | +MATCH (dup_attr_to_delete)-[r {branch: branch}]-() |
| 64 | +DELETE r |
| 65 | +// ------------- |
| 66 | +// delete any orphaned Attributes |
| 67 | +// ------------- |
| 68 | +WITH DISTINCT dup_attr_to_delete |
| 69 | +WHERE NOT exists((dup_attr_to_delete)--()) |
| 70 | +DELETE dup_attr_to_delete |
| 71 | + """ |
| 72 | + self.add_to_query(query) |
| 73 | + |
| 74 | + |
| 75 | +class Migration040(GraphMigration): |
| 76 | + name: str = "040_duplicated_attributes" |
| 77 | + queries: Sequence[type[Query]] = [DeleteDuplicatedAttributesQuery] |
| 78 | + minimum_version: int = 39 |
| 79 | + |
| 80 | + async def validate_migration(self, db: InfrahubDatabase) -> MigrationResult: # noqa: ARG002 |
| 81 | + return MigrationResult() |
0 commit comments