From 0334f946d50c586fb7481dbaf3b46563067d9a9c Mon Sep 17 00:00:00 2001 From: Julian Geiger Date: Mon, 6 Oct 2025 11:57:27 +0200 Subject: [PATCH 1/6] wip --- src/aiida/tools/_dumping/detect.py | 30 ++++++++++++----------- src/aiida/tools/_dumping/engine.py | 38 ++++++++++++++--------------- src/aiida/tools/_dumping/mapping.py | 19 ++++++++++----- 3 files changed, 47 insertions(+), 40 deletions(-) diff --git a/src/aiida/tools/_dumping/detect.py b/src/aiida/tools/_dumping/detect.py index 9e2d14ab04..6f9366674d 100644 --- a/src/aiida/tools/_dumping/detect.py +++ b/src/aiida/tools/_dumping/detect.py @@ -83,15 +83,9 @@ def get_nodes( ignore_time_filters: bool = False, exclude_tracked: bool = True, ) -> ProcessingQueue: - """Unified method to get nodes with various filtering options. + """Unified method to get nodes with various filtering options.""" + from aiida.common.progress_reporter import get_progress_reporter, set_progress_bar_tqdm - :param group_scope: Determines the query scope (ANY, IN_GROUP, NO_GROUP) - :param group: The specific group to filter by when scope is IN_GROUP - :param apply_filters: Whether to apply top-level and caller filters - :param ignore_time_filters: Whether to ignore time-based filters - :param exclude_tracked: Whether to exclude nodes already in dump tracker - :return: ProcessingQueue containing the filtered nodes - """ if group_scope == GroupDumpScope.IN_GROUP and not group: msg = 'Scope is IN_GROUP but no group object was provided.' raise ValueError(msg) @@ -104,10 +98,15 @@ def get_nodes( # Get nodes by type processing_queue = ProcessingQueue() + set_progress_bar_tqdm() + # Process calculations - calc_nodes = self._query_single_type( - orm_type=orm.CalculationNode, group_scope=group_scope, group=group, base_filters=base_filters - ) + with get_progress_reporter()(desc='Querying calculations from database', total=0) as progress: + calc_nodes = self._query_single_type( + orm_type=orm.CalculationNode, group_scope=group_scope, group=group, base_filters=base_filters + ) + progress.update(1) + if exclude_tracked: calc_nodes = self._exclude_tracked_nodes(calc_nodes, 'calculations') if apply_filters: @@ -115,9 +114,12 @@ def get_nodes( processing_queue.calculations = calc_nodes # Process workflows - workflow_nodes = self._query_single_type( - orm_type=orm.WorkflowNode, group_scope=group_scope, group=group, base_filters=base_filters - ) + with get_progress_reporter()(desc='Querying workflows from database', total=0) as progress: + workflow_nodes = self._query_single_type( + orm_type=orm.WorkflowNode, group_scope=group_scope, group=group, base_filters=base_filters + ) + progress.update(1) + if exclude_tracked: workflow_nodes = self._exclude_tracked_nodes(workflow_nodes, 'workflows') if apply_filters: diff --git a/src/aiida/tools/_dumping/engine.py b/src/aiida/tools/_dumping/engine.py index c55910a2f5..21aa30dba0 100644 --- a/src/aiida/tools/_dumping/engine.py +++ b/src/aiida/tools/_dumping/engine.py @@ -78,26 +78,21 @@ def __init__( def _build_mapping_for_target(self) -> GroupNodeMapping: """Build the appropriate group-node mapping based on the target entity and config.""" if isinstance(self.dump_target_entity, orm.Group): - # Single group dump - pass it as a single-element list + logger.report('Building group-node mapping for single group...') return GroupNodeMapping.build_from_db(groups=[self.dump_target_entity]) elif isinstance(self.dump_target_entity, Profile): - # Profile dump - depends on config assert isinstance(self.config, ProfileDumpConfig) if self.config.all_entries: - # Build mapping for all groups + logger.report('Building group-node mapping for all groups in profile...') return GroupNodeMapping.build_from_db(groups=None) elif self.config.groups: - # Build mapping only for specified groups + logger.report(f'Building group-node mapping for {len(self.config.groups)} specified groups...') return GroupNodeMapping.build_from_db(groups=self.config.groups) else: - # No groups specified - return empty mapping return GroupNodeMapping() - else: - return GroupNodeMapping() - def _log_dump_start(self) -> None: """Log the start of a dump operation.""" @@ -219,17 +214,28 @@ def _dump_profile(self) -> None: and not self.config.filters_set and not self.config.dump_mode == DumpMode.DRY_RUN ): - # NOTE: Hack for now, delete empty directory again. - # Ideally don't even create in the first place. - # Need to check again where it is actually created. DumpPaths._safe_delete_directory(path=self.dump_paths.base_output_path) return None self.dump_tracker.set_current_mapping(self.current_mapping) + + logger.report('Detecting changes since last dump. This may take a while for large databases...') node_changes = self.detector._detect_node_changes() + msg = ( + f'Detected {len(node_changes.new_or_modified)} new/modified nodes ' + 'and {len(node_changes.deleted)} deleted nodes.' + ) + logger.report(msg) + group_changes = self.detector._detect_group_changes( previous_mapping=self.dump_tracker.previous_mapping, current_mapping=self.current_mapping ) + msg = ( + f'Detected {len(group_changes.new)} new, {len(group_changes.modified)} modified, ' + f'and {len(group_changes.deleted)} deleted groups.' + ) + logger.report(msg) + all_changes = DumpChanges(nodes=node_changes, groups=group_changes) if all_changes.is_empty(): @@ -250,12 +256,4 @@ def _dump_profile(self) -> None: ) deletion_manager._handle_deleted_entities() - profile_dump_executor = ProfileDumpExecutor( - config=self.config, - dump_paths=self.dump_paths, - dump_tracker=self.dump_tracker, - process_dump_executor=self.process_dump_executor, - detector=self.detector, - current_mapping=self.current_mapping, - ) - profile_dump_executor.dump(changes=all_changes) + logger.report('Processing group changes.') diff --git a/src/aiida/tools/_dumping/mapping.py b/src/aiida/tools/_dumping/mapping.py index 3dfe956d89..97320a96d9 100644 --- a/src/aiida/tools/_dumping/mapping.py +++ b/src/aiida/tools/_dumping/mapping.py @@ -16,6 +16,10 @@ from aiida import orm from aiida.tools._dumping.utils import GroupChanges, GroupInfo, GroupModificationInfo, NodeMembershipChange +from aiida.common.log import AIIDA_LOGGER + +LOGGER = AIIDA_LOGGER.getChild('tools._dumping.mapping') + @dataclass class GroupNodeMapping: @@ -70,12 +74,8 @@ def from_dict(cls, data: Dict) -> 'GroupNodeMapping': @classmethod def build_from_db(cls, groups: Optional[Union[List[orm.Group], List[str], List[int]]] = None) -> 'GroupNodeMapping': - """Build a mapping from the current database state. + """Build a mapping from the current database state.""" - :param groups: If provided, only build mapping for these specific groups. - If None, build mapping for all groups. - :return: Populated ``GroupNodeMapping`` instance - """ mapping = cls() # Query all groups and their nodes, or just the specific groups @@ -89,15 +89,22 @@ def build_from_db(cls, groups: Optional[Union[List[orm.Group], List[str], List[i else: group_uuids = [orm.load_group(g).uuid for g in groups] qb.append(orm.Group, tag='group', project=['uuid'], filters={'uuid': {'in': group_uuids}}) + LOGGER.report(f'Querying node memberships for {len(group_uuids)} groups...') else: # Query all groups qb.append(orm.Group, tag='group', project=['uuid']) + LOGGER.report('Querying node memberships for all groups in profile...') qb.append(orm.Node, with_group='group', project=['uuid']) - for group_uuid, node_uuid in qb.all(): + LOGGER.report('Retrieving group-node relationships from database...') + results = qb.all() + LOGGER.report(f'Processing {len(results)} group-node relationships...') + + for group_uuid, node_uuid in results: mapping._add_node_to_group(group_uuid, node_uuid) + LOGGER.report('Group-node mapping completed.') return mapping def diff(self, other: 'GroupNodeMapping') -> GroupChanges: From d5e148e4154e0191e039a3411875b606bafe8ff2 Mon Sep 17 00:00:00 2001 From: Julian Geiger Date: Mon, 6 Oct 2025 14:49:20 +0200 Subject: [PATCH 2/6] more logging --- src/aiida/tools/_dumping/detect.py | 68 +++++++++++++------ src/aiida/tools/_dumping/engine.py | 21 +++++- .../tools/_dumping/executors/collection.py | 2 +- 3 files changed, 68 insertions(+), 23 deletions(-) diff --git a/src/aiida/tools/_dumping/detect.py b/src/aiida/tools/_dumping/detect.py index 6f9366674d..7cf636023a 100644 --- a/src/aiida/tools/_dumping/detect.py +++ b/src/aiida/tools/_dumping/detect.py @@ -15,6 +15,7 @@ from aiida import orm from aiida.common import AIIDA_LOGGER +from aiida.common.progress_reporter import get_progress_reporter, set_progress_bar_tqdm from aiida.tools._dumping.config import GroupDumpConfig, GroupDumpScope, ProfileDumpConfig from aiida.tools._dumping.mapping import GroupNodeMapping from aiida.tools._dumping.utils import ( @@ -83,8 +84,15 @@ def get_nodes( ignore_time_filters: bool = False, exclude_tracked: bool = True, ) -> ProcessingQueue: - """Unified method to get nodes with various filtering options.""" - from aiida.common.progress_reporter import get_progress_reporter, set_progress_bar_tqdm + """Unified method to get nodes with various filtering options. + + :param group_scope: Determines the query scope (ANY, IN_GROUP, NO_GROUP) + :param group: The specific group to filter by when scope is IN_GROUP + :param apply_filters: Whether to apply top-level and caller filters + :param ignore_time_filters: Whether to ignore time-based filters + :param exclude_tracked: Whether to exclude nodes already in dump tracker + :return: ProcessingQueue containing the filtered nodes + """ if group_scope == GroupDumpScope.IN_GROUP and not group: msg = 'Scope is IN_GROUP but no group object was provided.' @@ -98,15 +106,14 @@ def get_nodes( # Get nodes by type processing_queue = ProcessingQueue() - set_progress_bar_tqdm() - # Process calculations - with get_progress_reporter()(desc='Querying calculations from database', total=0) as progress: - calc_nodes = self._query_single_type( - orm_type=orm.CalculationNode, group_scope=group_scope, group=group, base_filters=base_filters - ) - progress.update(1) + logger.report('Querying calculation nodes from database...') + calc_nodes = self._query_single_type( + orm_type=orm.CalculationNode, group_scope=group_scope, group=group, base_filters=base_filters + ) + logger.report(f'Retrieved {len(calc_nodes)} calculation nodes.') + # TODO: this also takes a while if exclude_tracked: calc_nodes = self._exclude_tracked_nodes(calc_nodes, 'calculations') if apply_filters: @@ -114,11 +121,11 @@ def get_nodes( processing_queue.calculations = calc_nodes # Process workflows - with get_progress_reporter()(desc='Querying workflows from database', total=0) as progress: - workflow_nodes = self._query_single_type( - orm_type=orm.WorkflowNode, group_scope=group_scope, group=group, base_filters=base_filters - ) - progress.update(1) + logger.report('Querying workflow nodes from database...') + workflow_nodes = self._query_single_type( + orm_type=orm.WorkflowNode, group_scope=group_scope, group=group, base_filters=base_filters + ) + logger.report(f'Retrieved {len(workflow_nodes)} workflow nodes.') if exclude_tracked: workflow_nodes = self._exclude_tracked_nodes(workflow_nodes, 'workflows') @@ -182,6 +189,7 @@ def _exclude_tracked_nodes(self, nodes: list[orm.ProcessNode], store_type: str) :param store_type: Target store (calculations or workflows) :return: List of initial nodes with already tracked (dumped) nodes removed """ + # TODO: add logging here too if not nodes: return nodes @@ -192,7 +200,19 @@ def _exclude_tracked_nodes(self, nodes: list[orm.ProcessNode], store_type: str) if not tracked_uuids: return nodes - return [node for node in nodes if node.uuid not in tracked_uuids] + return_nodes = [] + set_progress_bar_tqdm() + + with get_progress_reporter()(desc=f'Excluding tracked {store_type}...', total=len(nodes)) as progress: + for node in nodes: + if node.uuid not in tracked_uuids: + return_nodes.append(node) + + progress.update() + + logger.report(f'Applyied exclusion of tracked {store_type}.') + + return return_nodes except ValueError as e: logger.error(f"Error getting registry for '{store_type}': {e}") @@ -205,6 +225,7 @@ def _apply_behavioral_filters(self, nodes: list[orm.ProcessNode], store_type: st :param store_type: Target store (calculations or workflows) :return: Filtered list of nodes, with top-level and group membership filters applied """ + if not nodes: return nodes @@ -219,12 +240,19 @@ def _apply_behavioral_filters(self, nodes: list[orm.ProcessNode], store_type: st # Apply caller filter (keep top-level or explicitly grouped) filtered_nodes = [] - for node in nodes: - is_sub_node = bool(getattr(node, 'caller', None)) - is_explicitly_grouped = node.uuid in self.grouped_node_uuids + set_progress_bar_tqdm() + + with get_progress_reporter()(desc=f'Applying filters to {store_type}...', total=len(nodes)) as progress: + for node in nodes: + is_sub_node = bool(getattr(node, 'caller', None)) + is_explicitly_grouped = node.uuid in self.grouped_node_uuids + + if not is_sub_node or is_explicitly_grouped: + filtered_nodes.append(node) + + progress.update() - if not is_sub_node or is_explicitly_grouped: - filtered_nodes.append(node) + logger.report(f'Applied behavioral filters to {store_type}.') return filtered_nodes diff --git a/src/aiida/tools/_dumping/engine.py b/src/aiida/tools/_dumping/engine.py index 21aa30dba0..e4fe517167 100644 --- a/src/aiida/tools/_dumping/engine.py +++ b/src/aiida/tools/_dumping/engine.py @@ -78,10 +78,11 @@ def __init__( def _build_mapping_for_target(self) -> GroupNodeMapping: """Build the appropriate group-node mapping based on the target entity and config.""" if isinstance(self.dump_target_entity, orm.Group): - logger.report('Building group-node mapping for single group...') + logger.report(f'Building group-node mapping for single group: <{self.dump_target_entity.label}> ...') return GroupNodeMapping.build_from_db(groups=[self.dump_target_entity]) elif isinstance(self.dump_target_entity, Profile): + # Profile dump - depends on config assert isinstance(self.config, ProfileDumpConfig) if self.config.all_entries: @@ -93,6 +94,9 @@ def _build_mapping_for_target(self) -> GroupNodeMapping: else: return GroupNodeMapping() + else: + return GroupNodeMapping() + def _log_dump_start(self) -> None: """Log the start of a dump operation.""" @@ -214,12 +218,16 @@ def _dump_profile(self) -> None: and not self.config.filters_set and not self.config.dump_mode == DumpMode.DRY_RUN ): + # NOTE: Hack for now, delete empty directory again. + # Ideally don't even create in the first place. + # Need to check again where it is actually created. DumpPaths._safe_delete_directory(path=self.dump_paths.base_output_path) return None self.dump_tracker.set_current_mapping(self.current_mapping) logger.report('Detecting changes since last dump. This may take a while for large databases...') + logger.report('Detecting node changes...') node_changes = self.detector._detect_node_changes() msg = ( f'Detected {len(node_changes.new_or_modified)} new/modified nodes ' @@ -227,6 +235,7 @@ def _dump_profile(self) -> None: ) logger.report(msg) + logger.report('Detecting group changes...') group_changes = self.detector._detect_group_changes( previous_mapping=self.dump_tracker.previous_mapping, current_mapping=self.current_mapping ) @@ -256,4 +265,12 @@ def _dump_profile(self) -> None: ) deletion_manager._handle_deleted_entities() - logger.report('Processing group changes.') + profile_dump_executor = ProfileDumpExecutor( + config=self.config, + dump_paths=self.dump_paths, + dump_tracker=self.dump_tracker, + process_dump_executor=self.process_dump_executor, + detector=self.detector, + current_mapping=self.current_mapping, + ) + profile_dump_executor.dump(changes=all_changes) diff --git a/src/aiida/tools/_dumping/executors/collection.py b/src/aiida/tools/_dumping/executors/collection.py index 363fdc2e55..8b2353eaf3 100644 --- a/src/aiida/tools/_dumping/executors/collection.py +++ b/src/aiida/tools/_dumping/executors/collection.py @@ -211,7 +211,7 @@ def _handle_group_changes(self, group_changes: GroupChanges) -> None: :param group_changes: Populated ``GroupChanges`` object """ - logger.report('Processing group changes.') + logger.report('Processing group changes...') # Handle Deleted Groups. Actual directory deletion handled by DeletionExecutor, only logging done here. if group_changes.deleted: From 3a506939e4e17f82098eafae3cfe9c8bdd4b327b Mon Sep 17 00:00:00 2001 From: Julian Geiger Date: Mon, 6 Oct 2025 15:01:50 +0200 Subject: [PATCH 3/6] fix pre-commit --- src/aiida/tools/_dumping/mapping.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/aiida/tools/_dumping/mapping.py b/src/aiida/tools/_dumping/mapping.py index 97320a96d9..20f51ff117 100644 --- a/src/aiida/tools/_dumping/mapping.py +++ b/src/aiida/tools/_dumping/mapping.py @@ -14,9 +14,8 @@ from typing import Dict, List, Optional, Set, Union, cast from aiida import orm -from aiida.tools._dumping.utils import GroupChanges, GroupInfo, GroupModificationInfo, NodeMembershipChange - from aiida.common.log import AIIDA_LOGGER +from aiida.tools._dumping.utils import GroupChanges, GroupInfo, GroupModificationInfo, NodeMembershipChange LOGGER = AIIDA_LOGGER.getChild('tools._dumping.mapping') From e969c9625f8caa2171b993a7926165376bd9adf4 Mon Sep 17 00:00:00 2001 From: Julian Geiger Date: Mon, 6 Oct 2025 15:07:28 +0200 Subject: [PATCH 4/6] wip --- src/aiida/tools/_dumping/detect.py | 9 +++++---- src/aiida/tools/_dumping/engine.py | 1 + 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/aiida/tools/_dumping/detect.py b/src/aiida/tools/_dumping/detect.py index 7cf636023a..2901d2db6f 100644 --- a/src/aiida/tools/_dumping/detect.py +++ b/src/aiida/tools/_dumping/detect.py @@ -113,7 +113,6 @@ def get_nodes( ) logger.report(f'Retrieved {len(calc_nodes)} calculation nodes.') - # TODO: this also takes a while if exclude_tracked: calc_nodes = self._exclude_tracked_nodes(calc_nodes, 'calculations') if apply_filters: @@ -203,14 +202,16 @@ def _exclude_tracked_nodes(self, nodes: list[orm.ProcessNode], store_type: str) return_nodes = [] set_progress_bar_tqdm() - with get_progress_reporter()(desc=f'Excluding tracked {store_type}...', total=len(nodes)) as progress: + with get_progress_reporter()( + desc=f'Excluding already dumped {store_type}...', total=len(nodes) + ) as progress: for node in nodes: if node.uuid not in tracked_uuids: return_nodes.append(node) progress.update() - logger.report(f'Applyied exclusion of tracked {store_type}.') + logger.report(f'Applied exclusion of previously dumped {store_type}.') return return_nodes @@ -252,7 +253,7 @@ def _apply_behavioral_filters(self, nodes: list[orm.ProcessNode], store_type: st progress.update() - logger.report(f'Applied behavioral filters to {store_type}.') + logger.report(f'Applied relevant filters to {store_type}.') return filtered_nodes diff --git a/src/aiida/tools/_dumping/engine.py b/src/aiida/tools/_dumping/engine.py index e4fe517167..8626f00023 100644 --- a/src/aiida/tools/_dumping/engine.py +++ b/src/aiida/tools/_dumping/engine.py @@ -227,6 +227,7 @@ def _dump_profile(self) -> None: self.dump_tracker.set_current_mapping(self.current_mapping) logger.report('Detecting changes since last dump. This may take a while for large databases...') + logger.report('Detecting node changes...') node_changes = self.detector._detect_node_changes() msg = ( From 6d5469cd86c8c871c3ded59df67cb617d26357d5 Mon Sep 17 00:00:00 2001 From: Julian Geiger Date: Mon, 6 Oct 2025 15:19:28 +0200 Subject: [PATCH 5/6] final edits --- src/aiida/tools/_dumping/detect.py | 1 - src/aiida/tools/_dumping/engine.py | 6 +++--- src/aiida/tools/_dumping/mapping.py | 9 +++++++-- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/aiida/tools/_dumping/detect.py b/src/aiida/tools/_dumping/detect.py index 2901d2db6f..e5bb2fa8ba 100644 --- a/src/aiida/tools/_dumping/detect.py +++ b/src/aiida/tools/_dumping/detect.py @@ -188,7 +188,6 @@ def _exclude_tracked_nodes(self, nodes: list[orm.ProcessNode], store_type: str) :param store_type: Target store (calculations or workflows) :return: List of initial nodes with already tracked (dumped) nodes removed """ - # TODO: add logging here too if not nodes: return nodes diff --git a/src/aiida/tools/_dumping/engine.py b/src/aiida/tools/_dumping/engine.py index 8626f00023..ae482005ef 100644 --- a/src/aiida/tools/_dumping/engine.py +++ b/src/aiida/tools/_dumping/engine.py @@ -78,7 +78,7 @@ def __init__( def _build_mapping_for_target(self) -> GroupNodeMapping: """Build the appropriate group-node mapping based on the target entity and config.""" if isinstance(self.dump_target_entity, orm.Group): - logger.report(f'Building group-node mapping for single group: <{self.dump_target_entity.label}> ...') + logger.report(f'Building group-node mapping for single group `{self.dump_target_entity.label}`...') return GroupNodeMapping.build_from_db(groups=[self.dump_target_entity]) elif isinstance(self.dump_target_entity, Profile): @@ -108,7 +108,7 @@ def _log_dump_start(self) -> None: elif isinstance(self.dump_target_entity, Profile): dump_start_report = f'profile `{self.dump_target_entity.name}`' - msg = f'Starting dump of {dump_start_report} in {self.config.dump_mode.name.lower()} mode.' + msg = f'Starting dump of {dump_start_report} in {self.config.dump_mode.name.lower()} mode...' if self.config.dump_mode != DumpMode.DRY_RUN: logger.report(msg) @@ -232,7 +232,7 @@ def _dump_profile(self) -> None: node_changes = self.detector._detect_node_changes() msg = ( f'Detected {len(node_changes.new_or_modified)} new/modified nodes ' - 'and {len(node_changes.deleted)} deleted nodes.' + f'and {len(node_changes.deleted)} deleted nodes.' ) logger.report(msg) diff --git a/src/aiida/tools/_dumping/mapping.py b/src/aiida/tools/_dumping/mapping.py index 20f51ff117..d745ac9c91 100644 --- a/src/aiida/tools/_dumping/mapping.py +++ b/src/aiida/tools/_dumping/mapping.py @@ -73,7 +73,12 @@ def from_dict(cls, data: Dict) -> 'GroupNodeMapping': @classmethod def build_from_db(cls, groups: Optional[Union[List[orm.Group], List[str], List[int]]] = None) -> 'GroupNodeMapping': - """Build a mapping from the current database state.""" + """Build a mapping from the current database state. + + :param groups: If provided, only build mapping for these specific groups. + If None, build mapping for all groups. + :return: Populated ``GroupNodeMapping`` instance + """ mapping = cls() @@ -103,7 +108,7 @@ def build_from_db(cls, groups: Optional[Union[List[orm.Group], List[str], List[i for group_uuid, node_uuid in results: mapping._add_node_to_group(group_uuid, node_uuid) - LOGGER.report('Group-node mapping completed.') + LOGGER.report('Completed group-node mapping.') return mapping def diff(self, other: 'GroupNodeMapping') -> GroupChanges: From 87058a9e3fc7600a9595e593dfeb4cfedec3e9b0 Mon Sep 17 00:00:00 2001 From: Julian Geiger Date: Thu, 16 Oct 2025 09:41:29 +0200 Subject: [PATCH 6/6] custom progress bar format with wider description and click "Report" prefix --- src/aiida/tools/_dumping/detect.py | 15 +++++++++------ src/aiida/tools/_dumping/executors/collection.py | 10 ++++++---- src/aiida/tools/_dumping/mapping.py | 2 +- src/aiida/tools/_dumping/utils.py | 8 ++++++-- 4 files changed, 22 insertions(+), 13 deletions(-) diff --git a/src/aiida/tools/_dumping/detect.py b/src/aiida/tools/_dumping/detect.py index e5bb2fa8ba..b320af38a4 100644 --- a/src/aiida/tools/_dumping/detect.py +++ b/src/aiida/tools/_dumping/detect.py @@ -13,12 +13,15 @@ from datetime import datetime, timedelta from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type, Union, cast +import click + from aiida import orm from aiida.common import AIIDA_LOGGER from aiida.common.progress_reporter import get_progress_reporter, set_progress_bar_tqdm from aiida.tools._dumping.config import GroupDumpConfig, GroupDumpScope, ProfileDumpConfig from aiida.tools._dumping.mapping import GroupNodeMapping from aiida.tools._dumping.utils import ( + DUMP_PROGRESS_BAR_FORMAT, REGISTRY_TO_ORM_TYPE, DumpPaths, DumpTimes, @@ -199,11 +202,10 @@ def _exclude_tracked_nodes(self, nodes: list[orm.ProcessNode], store_type: str) return nodes return_nodes = [] - set_progress_bar_tqdm() + set_progress_bar_tqdm(bar_format=DUMP_PROGRESS_BAR_FORMAT) - with get_progress_reporter()( - desc=f'Excluding already dumped {store_type}...', total=len(nodes) - ) as progress: + progress_desc = f"{click.style('Report', fg='blue', bold=True)}: Excluding already dumped {store_type}..." + with get_progress_reporter()(desc=progress_desc, total=len(nodes)) as progress: for node in nodes: if node.uuid not in tracked_uuids: return_nodes.append(node) @@ -240,9 +242,10 @@ def _apply_behavioral_filters(self, nodes: list[orm.ProcessNode], store_type: st # Apply caller filter (keep top-level or explicitly grouped) filtered_nodes = [] - set_progress_bar_tqdm() + set_progress_bar_tqdm(bar_format=DUMP_PROGRESS_BAR_FORMAT) - with get_progress_reporter()(desc=f'Applying filters to {store_type}...', total=len(nodes)) as progress: + progress_desc = f"{click.style('Report', fg='blue', bold=True)}: Applying filters to {store_type}..." + with get_progress_reporter()(desc=progress_desc, total=len(nodes)) as progress: for node in nodes: is_sub_node = bool(getattr(node, 'caller', None)) is_explicitly_grouped = node.uuid in self.grouped_node_uuids diff --git a/src/aiida/tools/_dumping/executors/collection.py b/src/aiida/tools/_dumping/executors/collection.py index 8b2353eaf3..b46d573755 100644 --- a/src/aiida/tools/_dumping/executors/collection.py +++ b/src/aiida/tools/_dumping/executors/collection.py @@ -14,12 +14,14 @@ from pathlib import Path from typing import TYPE_CHECKING, Optional, Union +import click + from aiida import orm from aiida.common import AIIDA_LOGGER, NotExistent from aiida.common.progress_reporter import get_progress_reporter, set_progress_bar_tqdm from aiida.tools._dumping.detect import DumpChangeDetector from aiida.tools._dumping.tracking import DumpRecord, DumpTracker -from aiida.tools._dumping.utils import DumpChanges, DumpPaths, ProcessingQueue +from aiida.tools._dumping.utils import DUMP_PROGRESS_BAR_FORMAT, DumpChanges, DumpPaths, ProcessingQueue if TYPE_CHECKING: from aiida.tools._dumping.config import GroupDumpConfig, ProfileDumpConfig @@ -129,7 +131,7 @@ def _dump_nodes( :param group_context: _description_, defaults to None :param current_dump_root_for_nodes: _description_, defaults to None """ - set_progress_bar_tqdm() + set_progress_bar_tqdm(bar_format=DUMP_PROGRESS_BAR_FORMAT) nodes_to_dump = [] nodes_to_dump.extend(processing_queue.calculations) nodes_to_dump.extend(processing_queue.workflows) @@ -139,7 +141,6 @@ def _dump_nodes( desc = f'Dumping {len(nodes_to_dump)} nodes' if group_context: desc += f" for group '{group_context.label}'" - logger.report(desc) if current_dump_root_for_nodes is None: # This is a fallback, the caller should ideally always provide the explicit root. @@ -149,7 +150,8 @@ def _dump_nodes( current_dump_root_for_nodes = self.dump_paths.get_path_for_ungrouped_nodes() logger.warning(f'current_dump_root_for_nodes was None, derived as: {current_dump_root_for_nodes}') - with get_progress_reporter()(desc=desc, total=len(nodes_to_dump)) as progress: + progress_desc = f"{click.style('Report', fg='blue', bold=True)}: {desc}" + with get_progress_reporter()(desc=progress_desc, total=len(nodes_to_dump)) as progress: for node in nodes_to_dump: # Determine the specific, absolute path for this node's dump directory node_specific_dump_path = self.dump_paths.get_path_for_node( diff --git a/src/aiida/tools/_dumping/mapping.py b/src/aiida/tools/_dumping/mapping.py index d745ac9c91..399804d1c9 100644 --- a/src/aiida/tools/_dumping/mapping.py +++ b/src/aiida/tools/_dumping/mapping.py @@ -93,7 +93,7 @@ def build_from_db(cls, groups: Optional[Union[List[orm.Group], List[str], List[i else: group_uuids = [orm.load_group(g).uuid for g in groups] qb.append(orm.Group, tag='group', project=['uuid'], filters={'uuid': {'in': group_uuids}}) - LOGGER.report(f'Querying node memberships for {len(group_uuids)} groups...') + LOGGER.report(f'Querying node memberships for {len(group_uuids)} group(s)...') else: # Query all groups qb.append(orm.Group, tag='group', project=['uuid']) diff --git a/src/aiida/tools/_dumping/utils.py b/src/aiida/tools/_dumping/utils.py index e196852f78..02179621c7 100644 --- a/src/aiida/tools/_dumping/utils.py +++ b/src/aiida/tools/_dumping/utils.py @@ -11,15 +11,16 @@ from __future__ import annotations import os +import sys from dataclasses import dataclass, field from datetime import datetime from pathlib import Path from typing import Dict, List, Literal, Optional, Set, Type, Union -try: +if sys.version_info >= (3, 11): # typing.assert_never available since 3.11 from typing import assert_never -except ImportError: +else: from typing_extensions import assert_never from aiida import orm @@ -29,6 +30,8 @@ RegistryNameType = Literal['calculations', 'workflows', 'groups'] +# Progress bar format for dump operations - wider description field to avoid truncation +DUMP_PROGRESS_BAR_FORMAT = '{desc:60.60}{percentage:6.1f}%|{bar}| {n_fmt}/{total_fmt}' REGISTRY_TO_ORM_TYPE: dict[str, Type[Union[orm.CalculationNode, orm.WorkflowNode, orm.Group]]] = { 'calculations': orm.CalculationNode, @@ -47,6 +50,7 @@ } __all__ = ( + 'DUMP_PROGRESS_BAR_FORMAT', 'ORM_TYPE_TO_REGISTRY', 'REGISTRY_TO_ORM_TYPE', 'DumpMode',