diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e9147a..551454d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## Version 13.3.0 - 2024-06-27 + +- Added `workflows execute-all` which starts an execution on all documents in a dataset + ## Version 13.2.2 - 2024-06-13 - Bugfix `models update-training` now works as intended when specifying `--deployment-environment-id` diff --git a/lascli/__version__.py b/lascli/__version__.py index 07e6414..458dd32 100644 --- a/lascli/__version__.py +++ b/lascli/__version__.py @@ -7,4 +7,4 @@ __maintainer_email__ = 'magnus@lucidtech.ai' __title__ = 'lucidtech-las-cli' __url__ = 'https://github.com/LucidtechAI/las-cli' -__version__ = '13.2.2' +__version__ = '13.3.0' diff --git a/lascli/parser/datasets.py b/lascli/parser/datasets.py index bb3cf0a..dba9b2d 100644 --- a/lascli/parser/datasets.py +++ b/lascli/parser/datasets.py @@ -121,7 +121,7 @@ def _get_document_worker(las_client: Client, document_id, output_dir): return None -def _list_all_documents_in_dataset(las_client: Client, dataset_id): +def list_all_documents_in_dataset(las_client: Client, dataset_id): list_response = las_client.list_documents(dataset_id=dataset_id) yield from list_response['documents'] next_token = list_response.get('nextToken') @@ -332,7 +332,7 @@ def get_documents(las_client: Client, dataset_id, output_dir, num_threads, chunk already_downloaded_from_dataset = set() with ThreadPoolExecutor(max_workers=num_threads) as executor: documents = [] - for document in _list_all_documents_in_dataset(las_client, dataset_id): + for document in list_all_documents_in_dataset(las_client, dataset_id): if document['documentId'] in already_downloaded: already_downloaded_from_dataset.add(document['documentId']) else: @@ -480,7 +480,7 @@ def create_datasets_parser(subparsers): "options": {} (optional) }, ... - ] + ] Examples: [{"type": "remove-duplicates", "options": {}}] ''')) diff --git a/lascli/parser/workflows.py b/lascli/parser/workflows.py index f34f9a9..763123c 100644 --- a/lascli/parser/workflows.py +++ b/lascli/parser/workflows.py @@ -2,13 +2,16 @@ import json import pathlib import textwrap +import time from argparse import RawTextHelpFormatter +from functools import partial import dateparser from las import Client -from lascli.util import nullable, NotProvided, json_path, json_or_json_path +from .datasets import list_all_documents_in_dataset from lascli.actions import workflows +from lascli.util import nullable, NotProvided, json_path, json_or_json_path def list_workflows(las_client: Client, **optional_args): @@ -32,6 +35,21 @@ def execute_workflow(las_client: Client, workflow_id, path): return las_client.execute_workflow(workflow_id, content) +def execute_all_workflow(las_client: Client, workflow_id, dataset_id): + executions = [] + for i, document in enumerate(list_all_documents_in_dataset(las_client, dataset_id)): + content = {'documentId': document['documentId'], 'source': 'CLI', 'initialSleepInSeconds': i * 4} + if original_file_path := (document.get('metadata') or {}).get('originalFilePath'): + file_path = pathlib.Path(original_file_path) + content['title'] = file_path.name + execution = las_client.execute_workflow(workflow_id, content) + executions.append(execution) + print(f'Execution {execution["executionId"]} started on {document["documentId"]}') + time.sleep(1) + + return f'Started {len(executions)} executions' + + def list_workflow_executions(las_client: Client, workflow_id, **optional_args): return las_client.list_workflow_executions(workflow_id, **optional_args) @@ -196,6 +214,11 @@ def create_workflows_parser(subparsers): execute_workflow_parser.add_argument('path', help='path to json-file with input to the first state of the workflow') execute_workflow_parser.set_defaults(cmd=execute_workflow) + execute_workflow_parser = subparsers.add_parser('execute-all') + execute_workflow_parser.add_argument('workflow_id') + execute_workflow_parser.add_argument('dataset_id', help='Start execution on all documents in dataset') + execute_workflow_parser.set_defaults(cmd=execute_all_workflow) + list_executions_parser = subparsers.add_parser('list-executions') list_executions_parser.add_argument('workflow_id') list_executions_parser.add_argument('--status', '-s', nargs='+', help='Only return those with the given status') diff --git a/tests/test_workflow_executions.py b/tests/test_workflow_executions.py index 6520f41..7ea229f 100644 --- a/tests/test_workflow_executions.py +++ b/tests/test_workflow_executions.py @@ -12,6 +12,16 @@ def test_executions_create(parser, client): util.main_parser(parser, client, args) +def test_executions_create_all(parser, client): + args = [ + 'workflows', + 'execute-all', + service.create_workflow_id(), + service.create_dataset_id(), + ] + util.main_parser(parser, client, args) + + @pytest.mark.parametrize('sort_by', [ ('--sort-by', 'startTime'), ('--sort-by', 'endTime'),