Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1020,15 +1020,15 @@ jobs:
uses: CodSpeedHQ/action@v3
with:
token: ${{ secrets.CODSPEED_TOKEN }}
run: "poetry run pytest -v backend/tests/benchmark/ --codspeed"
run: "poetry run pytest -vvv backend/tests/benchmark/ --benchmark-verbose --codspeed"

- name: Run non-intensive benchmarks
if: |
!contains(github.event.pull_request.labels.*.name, 'ci/run-intensive-benchmarks')
uses: CodSpeedHQ/action@v3
with:
token: ${{ secrets.CODSPEED_TOKEN }}
run: "poetry run pytest -v backend/tests/benchmark/ --codspeed --ignore=backend/tests/benchmark/intensive"
run: "poetry run pytest -vvv backend/tests/benchmark/ --benchmark-verbose --codspeed --ignore=backend/tests/benchmark/intensive"

# ------------------------------------------ Coverall Report ------------------------------------------
coverall-report:
Expand Down
13 changes: 7 additions & 6 deletions backend/infrahub/core/constraint/node/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from infrahub.core.branch import Branch
from infrahub.core.node import Node
from infrahub.core.node.constraints.interface import NodeConstraintInterface
from infrahub.core.node.constraints.grouped_uniqueness import NodeGroupedUniquenessConstraint
from infrahub.core.relationship.constraints.interface import RelationshipManagerConstraintInterface
from infrahub.database import InfrahubDatabase

Expand All @@ -15,25 +15,26 @@ def __init__(
self,
db: InfrahubDatabase,
branch: Branch,
node_constraints: list[NodeConstraintInterface],
uniqueness_constraint: NodeGroupedUniquenessConstraint,
relationship_manager_constraints: list[RelationshipManagerConstraintInterface],
) -> None:
self.db = db
self.branch = branch
self.node_constraints = node_constraints
self.uniqueness_constraint = uniqueness_constraint
self.relationship_manager_constraints = relationship_manager_constraints

async def check(self, node: Node, field_filters: list[str] | None = None) -> None:
async with self.db.start_session() as db:
await node.resolve_relationships(db=db)

for node_constraint in self.node_constraints:
await node_constraint.check(node, filters=field_filters)

for relationship_name in node.get_schema().relationship_names:
if field_filters and relationship_name not in field_filters:
continue
relationship_manager: RelationshipManager = getattr(node, relationship_name)
await relationship_manager.fetch_relationship_ids(db=db, force_refresh=True)
for relationship_constraint in self.relationship_manager_constraints:
await relationship_constraint.check(relm=relationship_manager, node_schema=node.get_schema())

# If HFID constraint is the only constraint violated, all other constraints need to have ran before,
# as it means there is an existing node that we might want to update in the case of an upsert
await self.uniqueness_constraint.check(node, filters=field_filters)
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,13 @@ async def execute(self, db: InfrahubDatabase) -> MigrationResult:
if not isinstance(schema, NodeSchema | GenericSchema):
continue

schema_constraint_path_groups = schema.get_unique_constraint_schema_attribute_paths(
uniqueness_constraint_paths = schema.get_unique_constraint_schema_attribute_paths(
schema_branch=schema_branch
)
includes_optional_attr: bool = False

for constraint_group in schema_constraint_path_groups:
for schema_attribute_path in constraint_group:
for uniqueness_constraint_path in uniqueness_constraint_paths:
for schema_attribute_path in uniqueness_constraint_path.attributes_paths:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would think that we generally leave migrations alone once they are merged, but not sure if others agree with that

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think code imported by the migration might change as well, so migration behavior might be slightly different anyway

if (
schema_attribute_path.attribute_schema
and schema_attribute_path.attribute_schema.optional is True
Expand Down
117 changes: 78 additions & 39 deletions backend/infrahub/core/node/constraints/grouped_uniqueness.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,19 @@
SchemaAttributePath,
SchemaAttributePathValue,
)
from infrahub.core.schema.basenode_schema import (
SchemaUniquenessConstraintPath,
UniquenessConstraintType,
UniquenessConstraintViolation,
)
from infrahub.core.validators.uniqueness.index import UniquenessQueryResultsIndex
from infrahub.core.validators.uniqueness.model import (
NodeUniquenessQueryRequest,
QueryAttributePath,
QueryRelationshipAttributePath,
)
from infrahub.core.validators.uniqueness.query import NodeUniqueAttributeConstraintQuery
from infrahub.exceptions import ValidationError
from infrahub.exceptions import HFIDViolatedError, ValidationError

from .interface import NodeConstraintInterface

Expand All @@ -39,15 +44,15 @@ async def _build_query_request(
self,
updated_node: Node,
node_schema: MainSchemaTypes,
path_groups: list[list[SchemaAttributePath]],
uniqueness_constraint_paths: list[SchemaUniquenessConstraintPath],
filters: list[str] | None = None,
) -> NodeUniquenessQueryRequest:
query_request = NodeUniquenessQueryRequest(kind=node_schema.kind)
for path_group in path_groups:
for uniqueness_constraint_path in uniqueness_constraint_paths:
include_in_query = not filters
query_relationship_paths: set[QueryRelationshipAttributePath] = set()
query_attribute_paths: set[QueryAttributePath] = set()
for attribute_path in path_group:
for attribute_path in uniqueness_constraint_path.attributes_paths:
if attribute_path.related_schema and attribute_path.relationship_schema:
if filters and attribute_path.relationship_schema.name in filters:
include_in_query = True
Expand Down Expand Up @@ -118,63 +123,77 @@ async def _get_node_attribute_path_values(
)
return node_value_combination

def _check_one_constraint_group(
self, schema_attribute_path_values: list[SchemaAttributePathValue], results_index: UniquenessQueryResultsIndex
) -> None:
# constraint cannot be violated if this node is missing any values
if any(sapv.value is None for sapv in schema_attribute_path_values):
return

matching_node_ids = results_index.get_node_ids_for_value_group(schema_attribute_path_values)
if not matching_node_ids:
return
uniqueness_constraint_fields = []
for sapv in schema_attribute_path_values:
if sapv.relationship_schema:
uniqueness_constraint_fields.append(sapv.relationship_schema.name)
elif sapv.attribute_schema:
uniqueness_constraint_fields.append(sapv.attribute_schema.name)
uniqueness_constraint_string = "-".join(uniqueness_constraint_fields)
error_msg = f"Violates uniqueness constraint '{uniqueness_constraint_string}'"
errors = [ValidationError({field_name: error_msg}) for field_name in uniqueness_constraint_fields]
raise ValidationError(errors)

async def _check_results(
async def _get_violations(
self,
updated_node: Node,
path_groups: list[list[SchemaAttributePath]],
uniqueness_constraint_paths: list[SchemaUniquenessConstraintPath],
query_results: Iterable[QueryResult],
) -> None:
) -> list[UniquenessConstraintViolation]:
results_index = UniquenessQueryResultsIndex(
query_results=query_results, exclude_node_ids={updated_node.get_id()}
)
for path_group in path_groups:
violations = []
for uniqueness_constraint_path in uniqueness_constraint_paths:
# path_group = one constraint (that can contain multiple items)
schema_attribute_path_values = await self._get_node_attribute_path_values(
updated_node=updated_node, path_group=path_group
updated_node=updated_node, path_group=uniqueness_constraint_path.attributes_paths
)
self._check_one_constraint_group(
schema_attribute_path_values=schema_attribute_path_values, results_index=results_index

# constraint cannot be violated if this node is missing any values
if any(sapv.value is None for sapv in schema_attribute_path_values):
continue

matching_node_ids = results_index.get_node_ids_for_value_group(schema_attribute_path_values)
if not matching_node_ids:
continue

uniqueness_constraint_fields = []
for sapv in schema_attribute_path_values:
if sapv.relationship_schema:
uniqueness_constraint_fields.append(sapv.relationship_schema.name)
elif sapv.attribute_schema:
uniqueness_constraint_fields.append(sapv.attribute_schema.name)

violations.append(
UniquenessConstraintViolation(
nodes_ids=matching_node_ids,
fields=uniqueness_constraint_fields,
typ=uniqueness_constraint_path.typ,
)
)

async def _check_one_schema(
return violations

async def _get_single_schema_violations(
self,
node: Node,
node_schema: MainSchemaTypes,
at: Timestamp | None = None,
filters: list[str] | None = None,
) -> None:
) -> list[UniquenessConstraintViolation]:
schema_branch = self.db.schema.get_schema_branch(name=self.branch.name)
path_groups = node_schema.get_unique_constraint_schema_attribute_paths(schema_branch=schema_branch)

uniqueness_constraint_paths = node_schema.get_unique_constraint_schema_attribute_paths(
schema_branch=schema_branch
)
query_request = await self._build_query_request(
updated_node=node, node_schema=node_schema, path_groups=path_groups, filters=filters
updated_node=node,
node_schema=node_schema,
uniqueness_constraint_paths=uniqueness_constraint_paths,
filters=filters,
)
if not query_request:
return
return []

query = await NodeUniqueAttributeConstraintQuery.init(
db=self.db, branch=self.branch, at=at, query_request=query_request, min_count_required=0
)
await query.execute(db=self.db)
await self._check_results(updated_node=node, path_groups=path_groups, query_results=query.get_results())
return await self._get_violations(
updated_node=node,
uniqueness_constraint_paths=uniqueness_constraint_paths,
query_results=query.get_results(),
)

async def check(self, node: Node, at: Timestamp | None = None, filters: list[str] | None = None) -> None:
def _frozen_constraints(schema: MainSchemaTypes) -> frozenset[frozenset[str]]:
Expand All @@ -195,7 +214,27 @@ def _frozen_constraints(schema: MainSchemaTypes) -> frozenset[frozenset[str]]:
frozen_parent_constraints = _frozen_constraints(parent_schema)
if frozen_node_constraints <= frozen_parent_constraints:
include_node_schema = False

if include_node_schema:
schemas_to_check.append(node_schema)

violations = []
for schema in schemas_to_check:
await self._check_one_schema(node=node, node_schema=schema, at=at, filters=filters)
schema_violations = await self._get_single_schema_violations(
node=node, node_schema=schema, at=at, filters=filters
)
violations.extend(schema_violations)

is_hfid_violated = any(violation.typ == UniquenessConstraintType.HFID for violation in violations)

for violation in violations:
if violation.typ == UniquenessConstraintType.STANDARD or (
violation.typ == UniquenessConstraintType.SUBSET_OF_HFID and not is_hfid_violated
):
error_msg = f"Violates uniqueness constraint '{'-'.join(violation.fields)}'"
raise ValidationError(error_msg)

for violation in violations:
if violation.typ == UniquenessConstraintType.HFID:
error_msg = f"Violates uniqueness constraint '{'-'.join(violation.fields)}'"
raise HFIDViolatedError(error_msg, matching_nodes_ids=violation.nodes_ids)
80 changes: 63 additions & 17 deletions backend/infrahub/core/schema/basenode_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,25 +432,71 @@ def parse_schema_path(self, path: str, schema: SchemaBranch | None = None) -> Sc
def get_unique_constraint_schema_attribute_paths(
self,
schema_branch: SchemaBranch,
include_unique_attributes: bool = False,
) -> list[list[SchemaAttributePath]]:
constraint_paths_groups = []
if include_unique_attributes:
for attribute_schema in self.unique_attributes:
constraint_paths_groups.append(
[SchemaAttributePath(attribute_schema=attribute_schema, attribute_property_name="value")]
)

if not self.uniqueness_constraints:
return constraint_paths_groups
) -> list[SchemaUniquenessConstraintPath]:
if self.uniqueness_constraints is None:
return []

uniqueness_constraint_paths = []

for uniqueness_path_group in self.uniqueness_constraints:
constraint_paths_group = []
for uniqueness_path_part in uniqueness_path_group:
constraint_paths_group.append(self.parse_schema_path(path=uniqueness_path_part, schema=schema_branch))
if constraint_paths_group not in constraint_paths_groups:
constraint_paths_groups.append(constraint_paths_group)
return constraint_paths_groups
attributes_paths = [
self.parse_schema_path(path=uniqueness_path_part, schema=schema_branch)
for uniqueness_path_part in uniqueness_path_group
]
uniqueness_constraint_type = self.get_uniqueness_constraint_type(
uniqueness_constraint=set(uniqueness_path_group), schema_branch=schema_branch
)
uniqueness_constraint_path = SchemaUniquenessConstraintPath(
attributes_paths=attributes_paths, typ=uniqueness_constraint_type
)
uniqueness_constraint_paths.append(uniqueness_constraint_path)

return uniqueness_constraint_paths

def convert_hfid_to_uniqueness_constraint(self, schema_branch: SchemaBranch) -> list[str] | None:
if self.human_friendly_id is None:
return None

uniqueness_constraint = []
for item in self.human_friendly_id:
schema_attribute_path = self.parse_schema_path(path=item, schema=schema_branch)
if schema_attribute_path.is_type_attribute:
uniqueness_constraint.append(item)
elif schema_attribute_path.is_type_relationship:
uniqueness_constraint.append(schema_attribute_path.relationship_schema.name)
return uniqueness_constraint

def get_uniqueness_constraint_type(
self, uniqueness_constraint: set[str], schema_branch: SchemaBranch
) -> UniquenessConstraintType:
hfid = self.convert_hfid_to_uniqueness_constraint(schema_branch=schema_branch)
if hfid is None:
return UniquenessConstraintType.STANDARD
hfid_set = set(hfid)
if uniqueness_constraint == hfid_set:
return UniquenessConstraintType.HFID
if uniqueness_constraint <= hfid_set:
return UniquenessConstraintType.SUBSET_OF_HFID
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would have thought this was not possible.
I thought that with your latest changes to how we build the HFID that the uniqueness_constraints always contain the HFID. do you have an example of when a uniqueness constraint is a subset of the HFID?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the uniqueness_constraints always contain the HFID.

Yes, but nothing prevents from having another uniqueness constraint that is a subset of the hfid. For instance the schema {"uniqueness_constraints": [["name__value"]], "human_friendly_id": ["name__value, color__value"]} will contain 2 uniqueness constraints after schema processing: [["name__value", "name__value, color__value"]]. While checking the name__value one, we would hit above condition

return UniquenessConstraintType.STANDARD


@dataclass
class SchemaUniquenessConstraintPath:
attributes_paths: list[SchemaAttributePath]
typ: UniquenessConstraintType


class UniquenessConstraintType(Enum):
HFID = "HFID"
SUBSET_OF_HFID = "SUBSET_OF_HFID"
STANDARD = "STANDARD"


@dataclass
class UniquenessConstraintViolation:
nodes_ids: set[str]
fields: list[str]
typ: UniquenessConstraintType
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is out of scope for this PR, but it would be more SOLID to move all of this logic for uniqueness constraints and parsing schema paths into their own separate module(s) instead of putting it all on the BaseNodeSchema
I believe I tried to do this at some point, but did not succeed for a reason that I do not recall



@dataclass
Expand Down
Loading
Loading