-
Couldn't load subscription status.
- Fork 35
fix(backend): performance issues when generating artifacts #7320
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e533ec5
da45814
4b3bc97
3616582
c49ad7a
d53a5d8
06d1d58
b276efb
146c6cf
f9bde88
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,6 +2,9 @@ | |
|
|
||
| from typing import TYPE_CHECKING, Any | ||
|
|
||
| from cachetools import TTLCache | ||
| from cachetools.keys import hashkey | ||
| from cachetools_async import cached | ||
| from git.exc import BadName, GitCommandError | ||
| from infrahub_sdk.exceptions import GraphQLError | ||
| from prefect import task | ||
|
|
@@ -248,12 +251,13 @@ async def sync_from_remote(self, commit: str | None = None) -> None: | |
| await self.update_commit_value(branch_name=self.infrahub_branch_name, commit=commit) | ||
|
|
||
|
|
||
| @task( | ||
| name="Fetch repository commit", | ||
| description="Retrieve a git repository at a given commit, if it does not already exist locally", | ||
| cache_policy=NONE, | ||
| @cached( | ||
| TTLCache(maxsize=100, ttl=30), | ||
| key=lambda *_, **kwargs: hashkey( | ||
| kwargs.get("repository_id"), kwargs.get("name"), kwargs.get("repository_kind"), kwargs.get("commit") | ||
| ), | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Given the TTL it might not have a practical meaning now, but given that commit can be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The TTL is lower than the git sync scheduled job on purpose because I'd expect this kind of edge case... I think it's fine like that for the moment. |
||
| ) | ||
| async def get_initialized_repo( | ||
| async def _get_initialized_repo( | ||
| client: InfrahubClient, repository_id: str, name: str, repository_kind: str, commit: str | None = None | ||
| ) -> InfrahubReadOnlyRepository | InfrahubRepository: | ||
| if repository_kind == InfrahubKind.REPOSITORY: | ||
|
|
@@ -263,3 +267,16 @@ async def get_initialized_repo( | |
| return await InfrahubReadOnlyRepository.init(id=repository_id, name=name, commit=commit, client=client) | ||
|
|
||
| raise NotImplementedError(f"The repository kind {repository_kind} has not been implemented") | ||
|
|
||
|
|
||
| @task( | ||
| name="Fetch repository commit", | ||
| description="Retrieve a git repository at a given commit, if it does not already exist locally", | ||
| cache_policy=NONE, | ||
| ) | ||
| async def get_initialized_repo( | ||
| client: InfrahubClient, repository_id: str, name: str, repository_kind: str, commit: str | None = None | ||
| ) -> InfrahubReadOnlyRepository | InfrahubRepository: | ||
| return await _get_initialized_repo( | ||
| client=client, repository_id=repository_id, name=name, repository_kind=repository_kind, commit=commit | ||
| ) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,9 +1,16 @@ | ||
| from typing import TYPE_CHECKING | ||
| from collections import defaultdict | ||
| from typing import TYPE_CHECKING, Any | ||
|
|
||
| from infrahub_sdk import InfrahubClient | ||
| from infrahub_sdk.node import RelationshipManager | ||
| from infrahub_sdk.protocols import CoreArtifactDefinition, CoreCheckDefinition, CoreGroup | ||
| from infrahub_sdk.types import Order | ||
|
|
||
| from infrahub.core import registry | ||
| from infrahub.core.constants import InfrahubKind | ||
| from infrahub.core.manager import NodeManager | ||
| from infrahub.database import InfrahubDatabase | ||
| from infrahub.generators.models import ProposedChangeGeneratorDefinition | ||
|
|
||
| from .models import RepositoryBranchInfo, RepositoryData | ||
|
|
||
|
|
@@ -46,3 +53,118 @@ async def get_repositories_commit_per_branch( | |
| ) | ||
|
|
||
| return repositories | ||
|
|
||
|
|
||
| def _collect_parameter_first_segments(params: Any) -> set[str]: | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need an There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think LLM used |
||
| segments: set[str] = set() | ||
|
|
||
| def _walk(value: Any) -> None: | ||
| if isinstance(value, str): | ||
| segment = value.split("__", 1)[0] | ||
| if segment: | ||
| segments.add(segment) | ||
| elif isinstance(value, dict): | ||
| for nested in value.values(): | ||
| _walk(nested) | ||
| elif isinstance(value, (list, tuple, set)): | ||
| for nested in value: | ||
| _walk(nested) | ||
|
|
||
| _walk(params) | ||
| return segments | ||
|
Comment on lines
+58
to
+74
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion | 🟠 Major Add docstrings to new utility functions. All three new functions ( As per coding guidelines, Python functions should include Google-style docstrings with:
Example for async def fetch_definition_targets(
client: InfrahubClient,
branch: str,
definition: CoreArtifactDefinition | CoreCheckDefinition | ProposedChangeGeneratorDefinition,
) -> CoreGroup:
"""Fetch target group for a definition with prefetched member nodes.
Retrieves the target group associated with the given artifact, check, or generator
definition, and prefetches member nodes based on the parameters defined in the
definition.
Args:
client: The Infrahub SDK client for API interactions
branch: The branch name to fetch the group from
definition: The definition object containing targets and parameters
Returns:
The target group with prefetched member nodes populated in the client store
"""Also applies to: 77-122, 124-155 🤖 Prompt for AI AgentsThere was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hey coderabbit, where is this requirement for docstring coming from? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: Length of output: 32967 @ogenstad The requirement for docstrings is not enforced by your linter configuration. Looking at your ignore = [
"D", # pydocstyle
"DOC", # pydoclint
...
]Both pydocstyle ( However, I noticed that existing functions in the codebase often include docstrings. For example, the async def get_repositories_commit_per_branch(
db: InfrahubDatabase,
) -> dict[str, RepositoryData]:
"""Get a list of all repositories and their commit on each branches.
This method is similar to 'get_list_repositories' method in the Python SDK.
NOTE: At some point, we should refactor this function to use a single Database query instead of one per branch
"""My suggestion was based on this existing pattern in the codebase as a best practice, not a hard requirement from your configuration. If you'd prefer not to receive docstring suggestions for this project, I can remember that for future reviews. 🧬 Code Graph Analysis Results
|
||
|
|
||
|
|
||
| async def _prefetch_group_member_nodes( | ||
| client: InfrahubClient, | ||
| members: RelationshipManager, | ||
| branch: str, | ||
| required_fields: set[str], | ||
| ) -> None: | ||
| ids_per_kind: dict[str, set[str]] = defaultdict(set) | ||
|
Comment on lines
+77
to
+83
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion | 🟠 Major Add Google‑style docstrings to new utilities. Per repo guidelines, add docstrings to _prefetch_group_member_nodes, fetch_definition_targets, and public fetch* helpers. Minimal examples: async def _prefetch_group_member_nodes(...):
+ """Batch‑prefetch group member nodes for required fields.
+
+ Args:
+ client: SDK client.
+ members: Relationship manager for group.members.
+ branch: Branch to query.
+ required_fields: Top‑level attribute/relationship names needed.
+
+ Returns:
+ None. Populates the client store with fetched nodes.
+ """
@@
async def _fetch_definition_targets(...)-> CoreGroup:
+ """Fetch target group and prefetch members based on definition parameters."""
@@
async def fetch_artifact_definition_targets(...)-> CoreGroup:
+ """Public helper to resolve targets for an artifact definition."""
@@
async def fetch_check_definition_targets(...)-> CoreGroup:
+ """Public helper to resolve targets for a check definition."""
@@
async def fetch_proposed_change_generator_definition_targets(...)-> CoreGroup:
+ """Public helper to resolve targets for a proposed change generator definition."""Also applies to: 119-141, 143-170 |
||
| for peer in members.peers: | ||
| if peer.id and peer.typename: | ||
| ids_per_kind[peer.typename].add(peer.id) | ||
|
|
||
|
Comment on lines
+84
to
+87
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wrong entity from RelationshipManager.peers; use relationship.peer. peers yields Relationship objects. Access the node via relationship.peer; otherwise peer.typename may not exist and ids_per_kind stays empty. - for peer in members.peers:
- if peer.id and peer.typename:
- ids_per_kind[peer.typename].add(peer.id)
+ for relationship in members.peers:
+ node = relationship.peer
+ if node.id and node.typename:
+ ids_per_kind[node.typename].add(node.id)🤖 Prompt for AI Agents |
||
| if not ids_per_kind: | ||
| return | ||
|
|
||
| batch = await client.create_batch() | ||
|
|
||
| for kind, ids in ids_per_kind.items(): | ||
| schema = await client.schema.get(kind=kind, branch=branch) | ||
|
|
||
| # FIXME: https://github.com/opsmill/infrahub-sdk-python/pull/205 | ||
| valid_fields = set(schema.attribute_names) | set(schema.relationship_names) | ||
| keep_relationships = set(schema.relationship_names) & required_fields | ||
| cleaned_fields = valid_fields - required_fields | ||
|
|
||
| kwargs: dict[str, Any] = { | ||
| "kind": kind, | ||
| "ids": list(ids), | ||
| "branch": branch, | ||
| "exclude": list(cleaned_fields), | ||
| "populate_store": True, | ||
| "order": Order(disable=True), | ||
| } | ||
|
|
||
| if keep_relationships: | ||
| kwargs["include"] = list(keep_relationships) | ||
|
|
||
| batch.add(task=client.filters, **kwargs) | ||
|
|
||
| async for _ in batch.execute(): | ||
| pass | ||
|
|
||
|
|
||
| async def _fetch_definition_targets( | ||
| client: InfrahubClient, | ||
| branch: str, | ||
| group_id: str, | ||
| parameters: Any, | ||
| ) -> CoreGroup: | ||
| group = await client.get( | ||
| kind=CoreGroup, | ||
| id=group_id, | ||
| branch=branch, | ||
| include=["members"], | ||
| ) | ||
|
|
||
| parameter_fields = _collect_parameter_first_segments(parameters) | ||
| await _prefetch_group_member_nodes( | ||
| client=client, | ||
| members=group.members, | ||
| branch=branch, | ||
| required_fields=parameter_fields, | ||
| ) | ||
|
|
||
| return group | ||
|
|
||
|
|
||
| async def fetch_artifact_definition_targets( | ||
| client: InfrahubClient, | ||
| branch: str, | ||
| definition: CoreArtifactDefinition, | ||
| ) -> CoreGroup: | ||
| return await _fetch_definition_targets( | ||
| client=client, branch=branch, group_id=definition.targets.id, parameters=definition.parameters.value | ||
| ) | ||
|
|
||
|
|
||
| async def fetch_check_definition_targets( | ||
| client: InfrahubClient, | ||
| branch: str, | ||
| definition: CoreCheckDefinition, | ||
| ) -> CoreGroup: | ||
| return await _fetch_definition_targets( | ||
| client=client, branch=branch, group_id=definition.targets.id, parameters=definition.parameters.value | ||
| ) | ||
|
|
||
|
|
||
| async def fetch_proposed_change_generator_definition_targets( | ||
| client: InfrahubClient, | ||
| branch: str, | ||
| definition: ProposedChangeGeneratorDefinition, | ||
| ) -> CoreGroup: | ||
| return await _fetch_definition_targets( | ||
| client=client, branch=branch, group_id=definition.group_id, parameters=definition.parameters | ||
| ) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not a blocker for this PR but I'm wondering if we could leverage prefect for the caching ? and what is the limitation if you already tried
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It was done using the Prefect caching mechanism in the first iteration, but as described in the following commit: 02b289d
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Basically Prefect caching is useful for distributed caching but here we want a local, efficient cache.