diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml index dbefe19ec9..c6937bac3a 100644 --- a/.github/workflows/format.yml +++ b/.github/workflows/format.yml @@ -2,7 +2,8 @@ name: "Code formatting" on: push: - branches: [ "main", "mlperf-inference" ] + branches: + - "**" env: python_version: "3.9" @@ -10,7 +11,6 @@ env: jobs: format-code: runs-on: ubuntu-latest - if: github.repository_owner == 'mlcommons' steps: - uses: actions/checkout@v4 with: diff --git a/.github/workflows/run-individual-script-tests.yml b/.github/workflows/run-individual-script-tests.yml index e6c3db4b5a..c9fd7f5995 100644 --- a/.github/workflows/run-individual-script-tests.yml +++ b/.github/workflows/run-individual-script-tests.yml @@ -32,6 +32,6 @@ jobs: for file in ${{ steps.getfile.outputs.files }}; do echo $file done - python3 -m pip install cmind + python3 -m pip install "cmind @ git+https://git@github.com/mlcommons/ck.git@mlperf-inference#subdirectory=cm" cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} DOCKER_CM_REPO=${{ github.event.pull_request.head.repo.html_url }} DOCKER_CM_REPO_BRANCH=${{ github.event.pull_request.head.ref }} TEST_INPUT_INDEX=${{ matrix.test-input-index }} python3 tests/script/process_tests.py ${{ steps.getfile.outputs.files }} diff --git a/.github/workflows/test-cm-based-submission-generation.yml b/.github/workflows/test-cm-based-submission-generation.yml index 4bd9d94304..d62f8e59be 100644 --- a/.github/workflows/test-cm-based-submission-generation.yml +++ b/.github/workflows/test-cm-based-submission-generation.yml @@ -50,7 +50,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python3 -m pip install cmind + python3 -m pip install "cmind @ git+https://git@github.com/mlcommons/ck.git@mlperf-inference#subdirectory=cm" cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} - name: Pull repo where test cases are uploaded run: | diff --git a/.github/workflows/test-cm-script-features.yml b/.github/workflows/test-cm-script-features.yml index ec0c1c7a9f..d793f93346 100644 --- a/.github/workflows/test-cm-script-features.yml +++ b/.github/workflows/test-cm-script-features.yml @@ -11,14 +11,14 @@ on: - '!**.md' jobs: - build: + test_cm_script_features: - runs-on: ${{ matrix.on }} + runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: python-version: ["3.12", "3.8"] - on: ["ubuntu-latest", "windows-latest", "macos-latest"] + os: ["ubuntu-latest", "windows-latest", "macos-latest"] steps: - uses: actions/checkout@v4 @@ -26,9 +26,13 @@ jobs: uses: actions/setup-python@v3 with: python-version: ${{ matrix.python-version }} + - name: Configure git longpaths (Windows) + if: matrix.os == 'windows-latest' + run: | + git config --system core.longpaths true - name: Install dependencies run: | - python -m pip install cmind + python -m pip install "cmind @ git+https://git@github.com/mlcommons/ck.git@mlperf-inference#subdirectory=cm" cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} cm run script --quiet --tags=get,sys-utils-cm - name: Run test_docker on linux diff --git a/.github/workflows/test-cm-tutorial-retinanet.yml b/.github/workflows/test-cm-tutorial-retinanet.yml index aca0248fb2..2899a26d14 100644 --- a/.github/workflows/test-cm-tutorial-retinanet.yml +++ b/.github/workflows/test-cm-tutorial-retinanet.yml @@ -28,7 +28,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install cmind + python -m pip install "cmind @ git+https://git@github.com/mlcommons/ck.git@mlperf-inference#subdirectory=cm" cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} cm run script --quiet --tags=get,sys-utils-cm - name: Test CM Tutorial Retinanet diff --git a/.github/workflows/test-cm-tutorial-tvm-pip.yml b/.github/workflows/test-cm-tutorial-tvm-pip.yml index 89a6d1d662..ce18079c49 100644 --- a/.github/workflows/test-cm-tutorial-tvm-pip.yml +++ b/.github/workflows/test-cm-tutorial-tvm-pip.yml @@ -29,7 +29,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install cmind + python -m pip install "cmind @ git+https://git@github.com/mlcommons/ck.git@mlperf-inference#subdirectory=cm" cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} cm run script --quiet --tags=get,sys-utils-cm - name: Test CM Tutorial TVM pip install with VirtualMachine Runtime @@ -51,7 +51,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install cmind + python -m pip install "cmind @ git+https://git@github.com/mlcommons/ck.git@mlperf-inference#subdirectory=cm" cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} cm run script --quiet --tags=get,sys-utils-cm - name: Test CM Tutorial TVM pip install with GraphExecutor Runtime diff --git a/.github/workflows/test-cm-tutorial-tvm.yml b/.github/workflows/test-cm-tutorial-tvm.yml index 0edb3142fb..d5d2937ea4 100644 --- a/.github/workflows/test-cm-tutorial-tvm.yml +++ b/.github/workflows/test-cm-tutorial-tvm.yml @@ -30,7 +30,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install cmind + python -m pip install "cmind @ git+https://git@github.com/mlcommons/ck.git@mlperf-inference#subdirectory=cm" cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} cm run script --quiet --tags=get,sys-utils-cm - name: Test CM Tutorial TVM diff --git a/.github/workflows/test-image-classification-onnx.yml b/.github/workflows/test-image-classification-onnx.yml index 44f27a6167..6d8bc91d54 100644 --- a/.github/workflows/test-image-classification-onnx.yml +++ b/.github/workflows/test-image-classification-onnx.yml @@ -5,7 +5,7 @@ name: image classification with ONNX on: pull_request: - branches: [ "main", "dev" ] + branches: [ "main", "dev", "mlperf-inference" ] paths: - '.github/workflows/test-image-classification-onnx.yml' - '**' @@ -26,11 +26,15 @@ jobs: uses: actions/setup-python@v3 with: python-version: ${{ matrix.python-version }} + - name: Configure git longpaths (Windows) + if: matrix.os == 'windows-latest' + run: | + git config --system core.longpaths true - name: Install dependencies run: | - python3 -m pip install cmind + python3 -m pip install "cmind @ git+https://git@github.com/mlcommons/ck.git@mlperf-inference#subdirectory=cm" cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} cm run script --quiet --tags=get,sys-utils-cm - name: Test image classification with ONNX run: | - cmr "python app image-classification onnx" --quiet + cm run script --tags=python,app,image-classification,onnx --quiet diff --git a/.github/workflows/test-mlperf-inference-abtf-poc.yml b/.github/workflows/test-mlperf-inference-abtf-poc.yml index de62093015..50a7909458 100644 --- a/.github/workflows/test-mlperf-inference-abtf-poc.yml +++ b/.github/workflows/test-mlperf-inference-abtf-poc.yml @@ -47,9 +47,13 @@ jobs: uses: actions/setup-python@v3 with: python-version: ${{ matrix.python-version }} + - name: Configure git longpaths (Windows) + if: matrix.os == 'windows-latest' + run: | + git config --system core.longpaths true - name: Install dependencies run: | - pip install cmind + pip install "cmind @ git+https://git@github.com/mlcommons/ck.git@mlperf-inference#subdirectory=cm" cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} cm pull repo mlcommons@cm4abtf --branch=poc diff --git a/.github/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml b/.github/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml index 7ac57d8e70..3594aaf86f 100644 --- a/.github/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml +++ b/.github/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml @@ -37,7 +37,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python3 -m pip install cmind + python3 -m pip install "cmind @ git+https://git@github.com/mlcommons/ck.git@mlperf-inference#subdirectory=cm" cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} - name: Test MLPerf Inference Bert ${{ matrix.backend }} on ${{ matrix.os }} if: matrix.os == 'windows-latest' diff --git a/.github/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml b/.github/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml index 4b71896296..ff856ad549 100644 --- a/.github/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml +++ b/.github/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml @@ -34,7 +34,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python3 -m pip install cmind + python3 -m pip install "cmind @ git+https://git@github.com/mlcommons/ck.git@mlperf-inference#subdirectory=cm" cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} cm run script --quiet --tags=get,sys-utils-cm cm run script --quiet --tags=install,prebuilt,llvm --version=${{ matrix.llvm-version }} diff --git a/.github/workflows/test-mlperf-inference-resnet50.yml b/.github/workflows/test-mlperf-inference-resnet50.yml index 5e1f00c478..4388e4bb9a 100644 --- a/.github/workflows/test-mlperf-inference-resnet50.yml +++ b/.github/workflows/test-mlperf-inference-resnet50.yml @@ -35,14 +35,17 @@ jobs: uses: actions/setup-python@v3 with: python-version: ${{ matrix.python-version }} + - name: Configure git longpaths (Windows) + if: matrix.os == 'windows-latest' + run: | + git config --system core.longpaths true - name: Install dependencies run: | - python3 -m pip install cmind + pip install "cmind @ git+https://git@github.com/mlcommons/ck.git@mlperf-inference#subdirectory=cm" cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} - name: Test MLPerf Inference ResNet50 (Windows) if: matrix.os == 'windows-latest' run: | - git config --system core.longpaths true cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=resnet50 --adr.loadgen.tags=_from-pip --pip_loadgen=yes --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --target_qps=1 -v --quiet - name: Test MLPerf Inference ResNet50 (Linux/macOS) if: matrix.os != 'windows-latest' diff --git a/.github/workflows/test-mlperf-inference-retinanet.yml b/.github/workflows/test-mlperf-inference-retinanet.yml index 5077ad19e7..eac9346fea 100644 --- a/.github/workflows/test-mlperf-inference-retinanet.yml +++ b/.github/workflows/test-mlperf-inference-retinanet.yml @@ -35,14 +35,17 @@ jobs: uses: actions/setup-python@v3 with: python-version: ${{ matrix.python-version }} + - name: Configure git longpaths (Windows) + if: matrix.os == 'windows-latest' + run: | + git config --system core.longpaths true - name: Install dependencies run: | - python3 -m pip install cmind + python3 -m pip install "cmind @ git+https://git@github.com/mlcommons/ck.git@mlperf-inference#subdirectory=cm" cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} - name: Test MLPerf Inference Retinanet using ${{ matrix.backend }} on ${{ matrix.os }} if: matrix.os == 'windows-latest' run: | - git config --system core.longpaths true cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --hw_name=gh_${{ matrix.os }} --model=retinanet --adr.loadgen.tags=_from-pip --pip_loadgen=yes --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=5 --quiet -v --target_qps=1 - name: Test MLPerf Inference Retinanet using ${{ matrix.backend }} on ${{ matrix.os }} if: matrix.os != 'windows-latest' diff --git a/.github/workflows/test-mlperf-inference-rnnt.yml b/.github/workflows/test-mlperf-inference-rnnt.yml index 08123eb924..08b681fea1 100644 --- a/.github/workflows/test-mlperf-inference-rnnt.yml +++ b/.github/workflows/test-mlperf-inference-rnnt.yml @@ -30,7 +30,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python3 -m pip install cmind + python3 -m pip install "cmind @ git+https://git@github.com/mlcommons/ck.git@mlperf-inference#subdirectory=cm" cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} cm run script --quiet --tags=get,sys-utils-cm - name: Test MLPerf Inference RNNT diff --git a/.github/workflows/test-mlperf-inference-tvm.yml b/.github/workflows/test-mlperf-inference-tvm.yml index a3189079e3..e07d4ce113 100644 --- a/.github/workflows/test-mlperf-inference-tvm.yml +++ b/.github/workflows/test-mlperf-inference-tvm.yml @@ -30,7 +30,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python3 -m pip install cmind + python3 -m pip install "cmind @ git+https://git@github.com/mlcommons/ck.git@mlperf-inference#subdirectory=cm" cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} cm run script --quiet --tags=get,sys-utils-cm - name: MLPerf Inference ResNet50 using TVM diff --git a/.github/workflows/test-mlperf-loadgen-onnx-huggingface-bert-fp32-squad.yml b/.github/workflows/test-mlperf-loadgen-onnx-huggingface-bert-fp32-squad.yml index 597121fb0d..ac4922f3c6 100644 --- a/.github/workflows/test-mlperf-loadgen-onnx-huggingface-bert-fp32-squad.yml +++ b/.github/workflows/test-mlperf-loadgen-onnx-huggingface-bert-fp32-squad.yml @@ -28,7 +28,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python3 -m pip install cmind + python3 -m pip install "cmind @ git+https://git@github.com/mlcommons/ck.git@mlperf-inference#subdirectory=cm" cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} - name: Test MLPerf loadgen with HuggingFace bert onnx fp32 squad model run: | diff --git a/.github/workflows/test-qaic-compute-sdk-build.yml b/.github/workflows/test-qaic-compute-sdk-build.yml index b2e774a440..0bf4f49280 100644 --- a/.github/workflows/test-qaic-compute-sdk-build.yml +++ b/.github/workflows/test-qaic-compute-sdk-build.yml @@ -26,7 +26,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python3 -m pip install cmind + python3 -m pip install "cmind @ git+https://git@github.com/mlcommons/ck.git@mlperf-inference#subdirectory=cm" cm pull repo mlcommons@ck cm run script --tags=get,sys-utils-cm --quiet diff --git a/.github/workflows/test-qaic-software-kit.yml b/.github/workflows/test-qaic-software-kit.yml index 4b877da008..febc14b4c7 100644 --- a/.github/workflows/test-qaic-software-kit.yml +++ b/.github/workflows/test-qaic-software-kit.yml @@ -31,7 +31,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python3 -m pip install cmind + python3 -m pip install "cmind @ git+https://git@github.com/mlcommons/ck.git@mlperf-inference#subdirectory=cm" cm pull repo mlcommons@ck cm run script --tags=get,sys-utils-cm --quiet diff --git a/README.md b/README.md index 843040705d..111817db26 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![License](https://img.shields.io/badge/License-Apache%202.0-green)](LICENSE.md) [![Python Version](https://img.shields.io/badge/python-3+-blue.svg)](https://github.com/mlcommons/ck/tree/master/cm/cmind) -[![Powered by CM](https://img.shields.io/badge/Powered_by-MLCommons%20CM-blue)](https://github.com/mlcommons/ck). +[![Powered by CM](https://img.shields.io/badge/Powered_by-MLCommons%20CM-blue)](https://pypi.org/project/cmind). [![Downloads](https://static.pepy.tech/badge/cm4mlops)](https://pepy.tech/project/cm4mlops) [![CM script automation features test](https://github.com/mlcommons/cm4mlops/actions/workflows/test-cm-script-features.yml/badge.svg)](https://github.com/mlcommons/cm4mlops/actions/workflows/test-cm-script-features.yml) @@ -13,35 +13,40 @@ [![Test QAIC Software kit Compilation](https://github.com/mlcommons/cm4mlops/actions/workflows/test-qaic-software-kit.yml/badge.svg)](https://github.com/mlcommons/cm4mlops/actions/workflows/test-qaic-software-kit.yml) -Please see the [docs](https://docs.mlcommons.org/cm4mlops/) site for understanding CM scripts better. The `mlperf-branch` of the **cm4mlops** repository contains updated CM scripts specifically for MLPerf Inference. For more information on using CM for MLPerf Inference, visit the [MLPerf Inference Documentation site](https://docs.mlcommons.org/inference/). +# CM4MLOps repository -## News +**CM4MLOps** repository is powered by the [Collective Mind automation framework](https://github.com/mlcommons/ck/tree/master/cm), +a [Python package](https://pypi.org/project/cmind/) with a CLI and API designed for creating and managing automations. -* [Upcoming Changes](https://github.com/mlcommons/cm4mlops/discussions/categories/announcements) -* [Ongoing Discussions](https://github.com/mlcommons/cm4mlops/discussions/categories/ideas) +Two key automations developed using CM are **Script** and **Cache**, which streamline machine learning (ML) workflows, +including managing Docker runs. Both Script and Cache automations are part of the **cm4mlops** repository. -## License +The CM scripts, also housed in this repository, consist of hundreds of modular Python-wrapped scripts accompanied +by `yaml` metadata, enabling the creation of robust and flexible ML workflows. -[Apache 2.0](LICENSE.md) +- **CM Scripts Documentation**: [https://docs.mlcommons.org/cm4mlops/](https://docs.mlcommons.org/cm4mlops/) +- **CM CLI Documentation**: [https://docs.mlcommons.org/ck/specs/cm-cli/](https://docs.mlcommons.org/ck/specs/cm-cli/) -## CM concepts +The `mlperf-branch` of the **cm4mlops** repository is dedicated to developments specific to MLPerf Inference. +Please submit any pull requests (PRs) to this branch. For more information about using CM for MLPerf Inference, +refer to the [MLPerf Inference Documentation](https://docs.mlcommons.org/inference/). -Check our [ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339). +## News -## Authors +* [Ongoing Discussions](https://github.com/mlcommons/cm4mlops/discussions) -[Grigori Fursin](https://cKnowledge.org/gfursin) and [Arjun Suresh](https://www.linkedin.com/in/arjunsuresh) +## License + +[Apache 2.0](LICENSE.md) -## Major script developers +## CM concepts -Arjun Suresh, Anandhu S, Grigori Fursin +Check our [ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) and the [white paper](https://arxiv.org/abs/2406.16791). -## Funding +## CM script developers -We thank [cKnowledge.org](https://cKnowledge.org), [cTuning foundation](https://cTuning.org) -and [MLCommons](https://mlcommons.org) for sponsoring this project! +Arjun Suresh, Anandhu Sooraj, Grigori Fursin -## Acknowledgments +## Parent project -We thank all [volunteers, collaborators and contributors](https://github.com/mlcommons/ck/blob/master/CONTRIBUTING.md) -for their support, fruitful discussions, and useful feedback! +Visit the [parent Collective Knowledge project](https://github.com/mlcommons/ck) for further details. diff --git a/automation/cache/module.py b/automation/cache/module.py index 6720fe5779..b205b539f2 100644 --- a/automation/cache/module.py +++ b/automation/cache/module.py @@ -110,6 +110,12 @@ def show(self, i): # for artifact in lst: path = artifact.path meta = artifact.meta + dependent_cached_path = meta.get( + 'dependent_cached_path', '') + if dependent_cached_path and not os.path.exists( + dependent_cached_path): + continue + original_meta = artifact.original_meta alias = meta.get('alias', '') diff --git a/automation/script/module.py b/automation/script/module.py index 97707b9ced..d63f607710 100644 --- a/automation/script/module.py +++ b/automation/script/module.py @@ -1170,6 +1170,30 @@ def _run(self, i): local_env_keys_from_meta = meta.get('local_env_keys', []) + # Check if has customize.py + path_to_customize_py = os.path.join(path, 'customize.py') + customize_code = None + customize_common_input = {} + + if os.path.isfile(path_to_customize_py) and cache: + r = utils.load_python_module( + {'path': path, 'name': 'customize'}) + if r['return'] > 0: + return r + + customize_code = r['code'] + + customize_common_input = { + 'input': i, + 'automation': self, + 'artifact': script_artifact, + 'customize': script_artifact.meta.get('customize', {}), + 'os_info': os_info, + 'recursion_spaces': recursion_spaces, + 'script_tags': script_tags, + 'variation_tags': variation_tags + } + ####################################################################### # Check if script is cached if we need to skip deps from cached entries this_script_cached = False @@ -1182,9 +1206,15 @@ def _run(self, i): r = find_cached_script({'self': self, 'recursion_spaces': recursion_spaces, + 'extra_recursion_spaces': extra_recursion_spaces, + 'add_deps_recursive': add_deps_recursive, 'script_tags': script_tags, 'found_script_tags': found_script_tags, + 'found_script_path': path, + 'customize_code': customize_code, + 'customize_common_input': customize_common_input, 'variation_tags': variation_tags, + 'variation_tags_string': variation_tags_string, 'explicit_variation_tags': explicit_variation_tags, 'version': version, 'version_min': version_min, @@ -1193,10 +1223,14 @@ def _run(self, i): 'new_cache_entry': new_cache_entry, 'meta': meta, 'env': env, + 'state': state, + 'const': const, + 'const_state': const_state, 'skip_remembered_selections': skip_remembered_selections, 'remembered_selections': remembered_selections, 'quiet': quiet, - 'verbose': verbose + 'verbose': verbose, + 'show_time': show_time }) if r['return'] > 0: return r @@ -1366,6 +1400,11 @@ def _run(self, i): cached_tags.append(x) if not found_cached and num_found_cached_scripts == 0: + if i.get('only_execute_from_cache'): + # useful to check valid cache entries for a script (cm show + # cache can return invalid cache entries for a script too) + return { + 'return': 1, 'error': f'No valid cache entry found for {cached_tags}'} # If not cached, create cached script artifact and mark as tmp # (remove if cache successful) @@ -1587,10 +1626,6 @@ def _run(self, i): # Clean some output files clean_tmp_files(clean_files, recursion_spaces) - # Check if has customize.py - path_to_customize_py = os.path.join(path, 'customize.py') - customize_code = None - # Prepare common input to prepare and run script run_script_input = { 'path': path, @@ -1620,13 +1655,8 @@ def _run(self, i): 'meta': meta, 'self': self } - - if repro_prefix != '': - run_script_input['repro_prefix'] = repro_prefix - if ignore_script_error: - run_script_input['ignore_script_error'] = True - - if os.path.isfile(path_to_customize_py): + if os.path.isfile( + path_to_customize_py): # possible duplicate execution - needs fix r = utils.load_python_module( {'path': path, 'name': 'customize'}) if r['return'] > 0: @@ -1644,10 +1674,14 @@ def _run(self, i): 'script_tags': script_tags, 'variation_tags': variation_tags } - run_script_input['customize_code'] = customize_code run_script_input['customize_common_input'] = customize_common_input + if repro_prefix != '': + run_script_input['repro_prefix'] = repro_prefix + if ignore_script_error: + run_script_input['ignore_script_error'] = True + # Assemble PIP versions pip_version_string = '' @@ -1945,10 +1979,9 @@ def _run(self, i): cached_meta['associated_script_artifact_uid'] = found_script_artifact[x + 1:] - # Check if the cached entry is dependent on any other cached - # entry + # Check if the cached entry is dependent on any path if dependent_cached_path != '': - if os.path.isdir(cached_path) and os.path.isdir( + if os.path.isdir(cached_path) and os.path.exists( dependent_cached_path): if not os.path.samefile( cached_path, dependent_cached_path): @@ -2120,7 +2153,7 @@ def _run(self, i): 'new_env': new_env, 'state': state, 'new_state': new_state, - 'deps': run_state['deps']} + 'deps': run_state.get('deps')} # Print output as json to console if i.get('json', False) or i.get('j', False): @@ -3568,18 +3601,22 @@ def _run_deps(self, deps, clean_env_keys_deps, env, state, const, const_state, a # deps should have non-empty tags d['tags'] += "," + new_variation_tags_string - run_state['deps'].append(d['tags']) + if run_state: + run_state['deps'].append(d['tags']) - if not run_state['fake_deps']: + if not run_state.get('fake_deps'): import copy - run_state_copy = copy.deepcopy(run_state) - run_state_copy['deps'] = [] + if not run_state: + run_state_copy = {} + else: + run_state_copy = copy.deepcopy(run_state) + run_state_copy['deps'] = [] - run_state_copy['parent'] = run_state['script_id'] + run_state_copy['parent'] = run_state['script_id'] - if len(run_state['script_variation_tags']) > 0: - run_state_copy['parent'] += " ( " + ',_'.join( - run_state['script_variation_tags']) + " )" + if len(run_state['script_variation_tags']) > 0: + run_state_copy['parent'] += " ( " + ',_'.join( + run_state['script_variation_tags']) + " )" # Run collective script via CM API: # Not very efficient but allows logging - can be optimized @@ -3873,6 +3910,12 @@ def run_native_script(self, i): run_script_input = i['run_script_input'] script_name = i['script_name'] env = i.get('env', '') + detect_version = i.get('detect_version', '') + + if detect_version: + postprocess = "detect_version" + else: + postprocess = "" # Create and work on a copy to avoid contamination env_copy = copy.deepcopy(run_script_input.get('env', {})) @@ -3884,7 +3927,7 @@ def run_native_script(self, i): run_script_input['env'] = env r = prepare_and_run_script_with_postprocessing( - run_script_input, postprocess="") + run_script_input, postprocess=postprocess) env_tmp = copy.deepcopy(run_script_input['env']) r['env_tmp'] = env_tmp @@ -4850,22 +4893,31 @@ def find_cached_script(i): import copy recursion_spaces = i['recursion_spaces'] + extra_recursion_spaces = i['extra_recursion_spaces'] script_tags = i['script_tags'] cached_tags = [] + customize_code = i.get('customize_code') + customize_common_input = i.get('customize_common_input', {}) found_script_tags = i['found_script_tags'] variation_tags = i['variation_tags'] + variation_tags_string = i['variation_tags_string'] explicit_variation_tags = i['explicit_variation_tags'] version = i['version'] version_min = i['version_min'] version_max = i['version_max'] extra_cache_tags = i['extra_cache_tags'] + add_deps_recursive = i['add_deps_recursive'] new_cache_entry = i['new_cache_entry'] meta = i['meta'] env = i['env'] + state = i['state'] + const = i['const'] + const_state = i['const_state'] self_obj = i['self'] skip_remembered_selections = i['skip_remembered_selections'] remembered_selections = i['remembered_selections'] quiet = i['quiet'] + show_time = i.get('show_time', False) search_tags = '' verbose = i.get('verbose', False) @@ -5020,8 +5072,68 @@ def find_cached_script(i): skip_cached_script = True continue + os_info = self_obj.os_info + + # Bat extension for this host OS + bat_ext = os_info['bat_ext'] + script_path = i['found_script_path'] + detected_version = None + + if os.path.exists(os.path.join(script_path, + f"validate_cache{bat_ext}")): + run_script_input = { + 'path': script_path, + 'bat_ext': bat_ext, + 'os_info': os_info, + 'recursion_spaces': recursion_spaces, + 'tmp_file_run': self_obj.tmp_file_run, + 'self': self_obj, + 'meta': meta, + 'customize_code': customize_code, + 'customize_common_input': customize_common_input + } + + deps = meta.get('deps') + if deps: + r = self_obj._call_run_deps(deps, self_obj.local_env_keys, meta.get('local_env_keys', []), env, state, const, const_state, add_deps_recursive, + recursion_spaces + extra_recursion_spaces, + remembered_selections, variation_tags_string, True, '', False, show_time, extra_recursion_spaces, {}) + if r['return'] > 0: + return r + + # Check if pre-process and detect + # if 'preprocess' in dir(customize_code): + + # logging.debug(recursion_spaces + ' - Running preprocess ...') + + # ii = copy.deepcopy(customize_common_input) + # ii['env'] = env + # ii['meta'] = meta + # # may need to detect versions in multiple paths + # ii['run_script_input'] = run_script_input + + # r = customize_code.preprocess(ii) + # if r['return'] > 0: + # return r + + ii = { + 'run_script_input': run_script_input, + 'env': env, + 'script_name': 'validate_cache', + 'detect_version': True + } + r = self_obj.run_native_script(ii) + # print(r) + if r['return'] > 0: + # return r + continue + if r.get('version'): + detected_version = r['version'] + if not skip_cached_script: cached_script_version = cached_script.meta.get('version', '') + if cached_script_version and detected_version and cached_script_version != detected_version: + continue skip_cached_script = check_versions( self_obj.cmind, cached_script_version, version_min, version_max) diff --git a/docs/cm-yaml-guide.md b/docs/cm-yaml-guide.md new file mode 100644 index 0000000000..2b0b1242b0 --- /dev/null +++ b/docs/cm-yaml-guide.md @@ -0,0 +1,46 @@ +This README provides a walkthrough of the `_cm.yaml` file. + +## Keys and Datatypes followed + +1. **alias**: `string` +2. **uid**: `string` +3. **automation_alias**: `string` +4. **automation_uid**: `string` +5. **category**: `string` +6. **developers**: `list of strings` +7. **tags**: `list of strings` +8. **default_env**: `dictionary` - Contains key-value pairs where values are `strings` +9. **env**: `dictionary` - Contains key-value pairs where values are `strings` +10. **input_mapping**: `dictionary` - Contains key-value pairs where values are `strings` +11. **env_key_mapping**: `dictionary` - Contains key-value pairs where values are `strings` +12. **new_env_keys**: `list of strings` +13. **new_state_keys**: `list of strings` +14. **deps**: `list of dictionaries` - Each dictionary can contain `tags` or other nested keys +15. **names**: `list of strings` +16. **enable_if_env**: `dictionary` - Contains key-value pairs where values are lists of `strings` +17. **skip_if_env**: `dictionary` - Contains key-value pairs where values are lists of `strings` +18. **prehook_deps**: `list of dictionaries` - Each dictionary may contain `names` and `tags` as lists +19. **posthook_deps**: `list of dictionaries` - Each dictionary may contain `tags` and other keys +20. **variation_groups_order**: `list of strings` +21. **variations**: `dictionary` - Each variation is a dictionary containing keys like `alias`, `default_variations`, `group`, etc. +22. **group**: `string` +23. **add_deps_recursive**: `dictionary` - Contains nested `tags` and other keys +24. **default_variations**: `dictionary` - Contains key-value pairs where values are `strings` +25. **docker**: `dictionary` - Contains keys specific to Docker configurations: + - **base_image**: `string` + - **image_name**: `string` + - **os**: `string` + - **os_version**: `string` + - **deps**: `list of dictionaries` - Each dictionary can include `tags` or other keys. + - **env**: `dictionary` - Contains key-value pairs where values are `strings` + - **interactive**: `boolean` + - **extra_run_args**: `string` + - **mounts**: `list of strings` - Specifies mount paths in the format `"source:destination"` + - **pre_run_cmds**: `list of strings` - Commands to run before the container starts + - **docker_input_mapping**: `dictionary` - Contains key-value pairs where values are strings, mapping input parameters to Docker environment variables + - **use_host_user_id**: `boolean` + - **use_host_group_id**: `boolean` + - **skip_run_cmd**: `string` + - **shm_size**: `string` + - **real_run**: `boolean` + - **all_gpus**: `string` diff --git a/pyproject.toml b/pyproject.toml index e55cac00a6..c05abc8ab1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,2 +1,2 @@ [build-system] -requires = ["setuptools>=60", "wheel", "cmind @ git+https://git@github.com/mlcommons/ck.git@084d4d6171a6e7ae9582a63777fbd19afa19947a#egg=cmind&subdirectory=cm"] +requires = ["setuptools>=60", "wheel", "cmind"] diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml index f10c09daf5..ebf1ed92f3 100644 --- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml +++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml @@ -479,6 +479,18 @@ deps: - 'on' + ## RGAT + - tags: get,ml-model,rgat + names: + - ml-model + - rgat-model + enable_if_env: + CM_MODEL: + - rgat + skip_if_env: + RGAT_CHECKPOINT_PATH: + - 'on' + ######################################################################## # Install datasets @@ -600,6 +612,15 @@ deps: CM_CRITEO_PREPROCESSED_PATH: - on + ## igbh for rgat + - tags: get,dataset,mlperf,inference,igbh + names: + - igbh-dataset + - illinois-graph-benchmark-heterogeneous + enable_if_env: + CM_MODEL: + - rgat + ######################################################################## # Install MLPerf inference dependencies @@ -1196,6 +1217,32 @@ variations: - tf - tflite + rgat: + group: models + env: + CM_MODEL: rgat + deps: + - tags: get,generic-python-lib,_package.colorama + - tags: get,generic-python-lib,_package.tqdm + - tags: get,generic-python-lib,_package.requests + - tags: get,generic-python-lib,_package.torchdata + - tags: get,generic-python-lib,_package.torch-geometric + - tags: get,generic-python-lib,_package.torch-scatter + - tags: get,generic-python-lib,_package.torch-sparse + - tags: get,generic-python-lib,_package.pybind11 + - tags: get,generic-python-lib,_package.PyYAML + - tags: get,generic-python-lib,_package.pydantic + - tags: get,generic-python-lib,_package.igb,_url.git+https://github.com/IllinoisGraphBenchmark/IGB-Datasets.git + - tags: get,generic-python-lib,_package.dgl,_find_links_url.https://data.dgl.ai/wheels/torch-2.1/repo.html + enable_if_env: + CM_MLPERF_DEVICE: + - cpu + - tags: get,generic-python-lib,_package.dgl,_find_links_url.https://data.dgl.ai/wheels/torch-2.1/cu121/repo.html + enable_if_env: + CM_MLPERF_DEVICE: + - gpu + + # Target devices cpu: group: device diff --git a/script/app-mlperf-inference-mlcommons-python/customize.py b/script/app-mlperf-inference-mlcommons-python/customize.py index 477190c676..8cd17e7de5 100644 --- a/script/app-mlperf-inference-mlcommons-python/customize.py +++ b/script/app-mlperf-inference-mlcommons-python/customize.py @@ -467,6 +467,34 @@ def get_run_cmd_reference( cmd = cmd.replace("--count", "--count-queries") env['OUTPUT_DIR'] = env['CM_MLPERF_OUTPUT_DIR'] + elif "rgat" in env['CM_MODEL']: + env['RUN_DIR'] = os.path.join( + env['CM_MLPERF_INFERENCE_SOURCE'], + "graph", + "R-GAT") + backend = env['CM_MLPERF_BACKEND'] + + dtype_rgat = env['CM_MLPERF_MODEL_PRECISION'].replace("float", "fp") + + if env.get('CM_MLPERF_SUBMISSION_GENERATION_STYLE', '') == "full": + mode_extra_options += " --dataset igbh-dgl --profile rgat-dgl-full " + else: + mode_extra_options += " --dataset igbh-dgl-tiny --profile debug-dgl " + + device = env['CM_MLPERF_DEVICE'] if env['CM_MLPERF_DEVICE'] != "gpu" else "cuda" + # have to add the condition for running in debug mode or real run mode + cmd = env['CM_PYTHON_BIN_WITH_PATH'] + " main.py " \ + " --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + \ + " --dataset-path " + env['CM_IGBH_DATASET_PATH'] + \ + " --device " + device.replace("cuda", "cuda:0") + \ + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \ + scenario_extra_options + mode_extra_options + \ + " --output " + env['CM_MLPERF_OUTPUT_DIR'] + \ + ' --dtype ' + dtype_rgat + \ + " --model-path " + env['RGAT_CHECKPOINT_PATH'] + \ + " --mlperf_conf " + \ + os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "mlperf.conf") + if env.get('CM_NETWORK_LOADGEN', '') in ["lon", "sut"]: cmd = cmd + " " + "--network " + env['CM_NETWORK_LOADGEN'] if env.get('CM_NETWORK_LOADGEN_SUT_SERVERS', []): diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml index 70d656bbaa..6e95a00827 100644 --- a/script/app-mlperf-inference/_cm.yaml +++ b/script/app-mlperf-inference/_cm.yaml @@ -254,6 +254,10 @@ variations: default_variations: backend: pytorch + reference,rgat: + default_variations: + backend: pytorch + reference,sdxl_: default_variations: backend: pytorch @@ -399,7 +403,7 @@ variations: nvidia-original: docker: interactive: True - extra_run_args: ' --runtime=nvidia --ulimit memlock=-1 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' + extra_run_args: ' --ulimit memlock=-1 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v3.1-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-l4-public os: "ubuntu" os_version: "20.04" @@ -411,12 +415,21 @@ variations: skip_if_env: CM_SKIP_GET_NVIDIA_DOCKER: - yes - mounts: - "${{ CM_CUDNN_TAR_FILE_PATH }}:${{ CM_CUDNN_TAR_FILE_PATH }}" - "${{ CM_TENSORRT_TAR_FILE_PATH }}:${{ CM_TENSORRT_TAR_FILE_PATH }}" - "${{ CUDA_RUN_FILE_LOCAL_PATH }}:${{ CUDA_RUN_FILE_LOCAL_PATH }}" - "${{ MLPERF_SCRATCH_PATH }}:${{ MLPERF_SCRATCH_PATH }}" + + update_meta_if_env: + - enable_if_env: + CM_HOST_OS_FLAVOR: + - ubuntu + CM_HOST_OS_VERSION: + - 20.04 + docker: + extra_run_args: ' --runtime=nvidia --ulimit memlock=-1 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' + default_variations: backend: tensorrt device: cuda @@ -743,6 +756,16 @@ variations: - 'yes' tags: get,dataset,kits19,preprocessed + rgat: + group: + model + add_deps_recursive: + mlperf-inference-implementation: + tags: _rgat + env: + CM_MODEL: + rgat + sdxl: group: model diff --git a/script/benchmark-program-mlperf/_cm.yaml b/script/benchmark-program-mlperf/_cm.yaml index 4ef6d77047..ed532f8bcb 100644 --- a/script/benchmark-program-mlperf/_cm.yaml +++ b/script/benchmark-program-mlperf/_cm.yaml @@ -18,6 +18,7 @@ variations: power: env: CM_MLPERF_POWER: 'yes' + CM_SAVE_CONSOLE_LOG: 'no' group: power-mode new_env_keys: - CM_MLPERF_* diff --git a/script/benchmark-program-mlperf/customize.py b/script/benchmark-program-mlperf/customize.py index 4ac5b9e213..23b4db3247 100644 --- a/script/benchmark-program-mlperf/customize.py +++ b/script/benchmark-program-mlperf/customize.py @@ -30,12 +30,15 @@ def postprocess(i): CM_MLPERF_RUN_COUNT=\$(cat \${CM_RUN_DIR}/count.txt); echo \${CM_MLPERF_RUN_COUNT}; CM_MLPERF_RUN_COUNT=\$((CM_MLPERF_RUN_COUNT+1)); -echo \${CM_MLPERF_RUN_COUNT} > \${CM_RUN_DIR}/count.txt && +echo \${CM_MLPERF_RUN_COUNT} > \${CM_RUN_DIR}/count.txt; + if [ \${CM_MLPERF_RUN_COUNT} -eq 1 ]; then export CM_MLPERF_USER_CONF="${CM_MLPERF_RANGING_USER_CONF}"; else export CM_MLPERF_USER_CONF="${CM_MLPERF_TESTING_USER_CONF}"; -fi && +fi +; + """ + env.get('CM_RUN_CMD', '').strip() else: env['CM_MLPERF_RUN_CMD'] = r""" diff --git a/script/get-cudnn/customize.py b/script/get-cudnn/customize.py index d4f3f53a6d..7d6984cbec 100644 --- a/script/get-cudnn/customize.py +++ b/script/get-cudnn/customize.py @@ -69,6 +69,7 @@ def preprocess(i): if cm_tmp_path != '': cm_tmp_path += ':' cm_tmp_path += '/usr/local/cuda/lib64:/usr/cuda/lib64:/usr/local/cuda/lib:/usr/cuda/lib:/usr/local/cuda-11/lib64:/usr/cuda-11/lib:/usr/local/cuda-12/lib:/usr/cuda-12/lib:/usr/local/packages/cuda/lib' + cm_tmp_path += os.path.expandvars(':$CUDNN_ROOT/lib') env['CM_TMP_PATH'] = cm_tmp_path env['CM_TMP_PATH_IGNORE_NON_EXISTANT'] = 'yes' diff --git a/script/get-dataset-mlperf-inference-gnn/_cm.yaml b/script/get-dataset-mlperf-inference-gnn/_cm.yaml deleted file mode 100644 index d56bfa9806..0000000000 --- a/script/get-dataset-mlperf-inference-gnn/_cm.yaml +++ /dev/null @@ -1,46 +0,0 @@ -alias: get-dataset-mlperf-inference-gnn -automation_alias: script -automation_uid: 5b4e0237da074764 -cache: true -tags: -- get -- dataset -- mlperf -- gnn -- icbh -- inference -uid: 824e61316c074253 -# new_env_keys: -input_mapping: - out_path: CM_IGBH_DATASET_OUT_PATH -deps: - - tags: mlperf,inference,source - names: - - inference-src - - tags: get,python - names: - - get-python -variations: - debug: - default: true - group: dataset-type - env: - CM_IGBH_DATASET_TYPE: debug - CM_IGBH_DATASET_SIZE: tiny - full: - group: dataset-type - env: - CM_IGBH_DATASET_TYPE: full - CM_IGBH_DATASET_SIZE: full - glt: - env: - CM_IGBH_GRAPH_COMPRESS: yes - csc: - group: compressed-layout - default: true - env: - CM_IGBH_GRAPH_COMPRESS_LAYOUT: csc - csr: - group: compressed-layout - env: - CM_IGBH_GRAPH_COMPRESS_LAYOUT: csr diff --git a/script/get-dataset-mlperf-inference-igbh/_cm.yaml b/script/get-dataset-mlperf-inference-igbh/_cm.yaml new file mode 100644 index 0000000000..2870c5a79d --- /dev/null +++ b/script/get-dataset-mlperf-inference-igbh/_cm.yaml @@ -0,0 +1,360 @@ +alias: get-dataset-mlperf-inference-igbh +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: true +tags: +- get +- dataset +- mlperf +- rgat +- igbh +- inference +uid: 824e61316c074253 +new_env_keys: + - CM_IGBH_DATASET_PATH +input_mapping: + out_path: CM_IGBH_DATASET_OUT_PATH +deps: + - tags: mlperf,inference,source + names: + - inference-src + - tags: get,python + names: + - get-python +prehook_deps: + #paper + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper/node_feat.npy + CM_DOWNLOAD_CHECKSUM: 71058b9ac8011bafa1c5467504452d13 + CM_DOWNLOAD_FILENAME: node_feet.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ + extra_cache_tags: dataset,igbh,paper,node_feat + force_cache: true + enable_if_env: + CM_IGBH_DATASET_TYPE: full + names: + - dae + tags: download-and-extract + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper/node_label_19.npy + CM_DOWNLOAD_CHECKSUM: be6fda45566e679bdb05ebea98ad16d4 + CM_DOWNLOAD_FILENAME: node_label_19.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ + extra_cache_tags: dataset,igbh,paper,node_label_19 + force_cache: true + enable_if_env: + CM_IGBH_DATASET_TYPE: full + names: + - dae + tags: download-and-extract + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper/node_label_2K.npy + CM_DOWNLOAD_CHECKSUM: 6eccab9a14f92f42be5b367c39002031 + CM_DOWNLOAD_FILENAME: node_label_2K.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ + extra_cache_tags: dataset,igbh,paper,node_label_2K + force_cache: true + enable_if_env: + CM_IGBH_DATASET_TYPE: full + names: + - dae + tags: download-and-extract + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper/paper_id_index_mapping.npy + CM_DOWNLOAD_CHECKSUM: f70dd642a4f7e41d926c91c8c054fc4c + CM_DOWNLOAD_FILENAME: paper_id_index_mapping.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ + extra_cache_tags: dataset,igbh,paper,paper_id_index_mapping + force_cache: true + enable_if_env: + CM_IGBH_DATASET_TYPE: full + names: + - dae + tags: download-and-extract + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + #paper_cites_paper + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper__cites__paper/edge_index.npy + CM_DOWNLOAD_CHECKSUM: f4897f53636c04a9c66f6063ec635c16 + CM_DOWNLOAD_FILENAME: edge_index.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/paper__cites__paper/ + extra_cache_tags: dataset,igbh,paper_cites_paper,edge_index + force_cache: true + enable_if_env: + CM_IGBH_DATASET_TYPE: full + names: + - dae + tags: download-and-extract + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + # author + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/author/author_id_index_mapping.npy + CM_DOWNLOAD_CHECKSUM: 58c15aab7dae03bbd57e6a4ac5e61bd9 + CM_DOWNLOAD_FILENAME: author_id_index_mapping.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/author/ + extra_cache_tags: dataset,igbh,author,author_id_index_mapping + force_cache: true + enable_if_env: + CM_IGBH_DATASET_TYPE: full + names: + - dae + tags: download-and-extract + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/author/node_feat.npy + CM_DOWNLOAD_CHECKSUM: 2ec2512b554088381c04ec013e893c8d + CM_DOWNLOAD_FILENAME: node_feat.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/author/ + extra_cache_tags: dataset,igbh,author,node_feat + force_cache: true + enable_if_env: + CM_IGBH_DATASET_TYPE: full + names: + - dae + tags: download-and-extract + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + # conference + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/conference/conference_id_index_mapping.npy + CM_DOWNLOAD_CHECKSUM: 0bf7c555d8c697b31b6af6c4cb6b6612 + CM_DOWNLOAD_FILENAME: conference_id_index_mapping.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/conference/ + extra_cache_tags: dataset,igbh,conference,conference_id_index_mapping + force_cache: true + enable_if_env: + CM_IGBH_DATASET_TYPE: full + names: + - dae + tags: download-and-extract + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/conference/node_feat.npy + CM_DOWNLOAD_CHECKSUM: 898ff529b8cf972261fedd50df6377f8 + CM_DOWNLOAD_FILENAME: node_feat.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/conference/ + extra_cache_tags: dataset,igbh,conference,node_feat + force_cache: true + enable_if_env: + CM_IGBH_DATASET_TYPE: full + names: + - dae + tags: download-and-extract + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + # institute + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/institute/institute_id_index_mapping.npy + CM_DOWNLOAD_CHECKSUM: 03fb45eafb7bd35875ef4c7cd2a299a9 + CM_DOWNLOAD_FILENAME: institute_id_index_mapping.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/institute/ + extra_cache_tags: dataset,igbh,institute,institute_id_index_mapping + force_cache: true + enable_if_env: + CM_IGBH_DATASET_TYPE: full + names: + - dae + tags: download-and-extract + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/institute/node_feat.npy + CM_DOWNLOAD_CHECKSUM: 12eaeced22d17b4e97d4b4742331c819 + CM_DOWNLOAD_FILENAME: node_feat.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/institute/ + extra_cache_tags: dataset,igbh,institute,node_feat + force_cache: true + enable_if_env: + CM_IGBH_DATASET_TYPE: full + names: + - dae + tags: download-and-extract + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + # journal + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/journal/journal_id_index_mapping.npy + CM_DOWNLOAD_CHECKSUM: b630c20852b76d17a5c9c37b39176f69 + CM_DOWNLOAD_FILENAME: journal_id_index_mapping.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/journal/ + extra_cache_tags: dataset,igbh,journal,journal_id_index_mapping + force_cache: true + enable_if_env: + CM_IGBH_DATASET_TYPE: full + names: + - dae + tags: download-and-extract + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/journal/node_feat.npy + CM_DOWNLOAD_CHECKSUM: 49d51b554b3004f10bee19d1c7f9b416 + CM_DOWNLOAD_FILENAME: node_feat.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/journal/ + extra_cache_tags: dataset,igbh,journal,node_feat + force_cache: true + enable_if_env: + CM_IGBH_DATASET_TYPE: full + names: + - dae + tags: download-and-extract + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + # fos + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/fos/fos_id_index_mapping.npy + CM_DOWNLOAD_CHECKSUM: 0f0cfde619361cde35d3be9f201d081a + CM_DOWNLOAD_FILENAME: fos_id_index_mapping.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/fos/ + extra_cache_tags: dataset,igbh,fos,fos_id_index_mapping + force_cache: true + enable_if_env: + CM_IGBH_DATASET_TYPE: full + names: + - dae + tags: download-and-extract + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/fos/node_feat.npy + CM_DOWNLOAD_CHECKSUM: 3ef3df19e2475c387fec10bac82773df + CM_DOWNLOAD_FILENAME: node_feat.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/fos/ + extra_cache_tags: dataset,igbh,fos,node_feat + force_cache: true + enable_if_env: + CM_IGBH_DATASET_TYPE: full + names: + - dae + tags: download-and-extract + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + # author__affiliated_to__institute + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/author__affiliated_to__institute/edge_index.npy + CM_DOWNLOAD_CHECKSUM: e35dba208f81e0987207f78787c75711 + CM_DOWNLOAD_FILENAME: edge_index.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/author__affiliated_to__institute/ + extra_cache_tags: dataset,igbh,author_affiliated_to_institute,edge_index + force_cache: true + enable_if_env: + CM_IGBH_DATASET_TYPE: full + names: + - dae + tags: download-and-extract + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + # paper__published__journal + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper__published__journal/edge_index.npy + CM_DOWNLOAD_CHECKSUM: 38505e83bde8e5cf94ae0a85afa60e13 + CM_DOWNLOAD_FILENAME: edge_index.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/paper__published__journal/ + extra_cache_tags: dataset,igbh,paper_published_journal,edge_index + force_cache: true + enable_if_env: + CM_IGBH_DATASET_TYPE: full + names: + - dae + tags: download-and-extract + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + # paper__topic__fos + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper__topic__fos/edge_index.npy + CM_DOWNLOAD_CHECKSUM: 427fb350a248ee6eaa8c21cde942fda4 + CM_DOWNLOAD_FILENAME: edge_index.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/paper__topic__fos/ + extra_cache_tags: dataset,igbh,paper_topic_fos,edge_index + force_cache: true + enable_if_env: + CM_IGBH_DATASET_TYPE: full + names: + - dae + tags: download-and-extract + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + # paper__venue__conference + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper__venue__conference/edge_index.npy + CM_DOWNLOAD_CHECKSUM: 541b8d43cd93579305cfb71961e10a7d + CM_DOWNLOAD_FILENAME: edge_index.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/paper__venue__conference/ + extra_cache_tags: dataset,igbh,paper_venue_conference,edge_index + force_cache: true + enable_if_env: + CM_IGBH_DATASET_TYPE: full + names: + - dae + tags: download-and-extract + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + # paper__written_by__author + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper__written_by__author/edge_index.npy + CM_DOWNLOAD_CHECKSUM: df39fe44bbcec93a640400e6d81ffcb5 + CM_DOWNLOAD_FILENAME: edge_index.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/paper__written_by__author/ + extra_cache_tags: dataset,igbh,paper_written_by_author,edge_index + force_cache: true + enable_if_env: + CM_IGBH_DATASET_TYPE: full + names: + - dae + tags: download-and-extract + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL +variations: + debug: + default: true + group: dataset-type + env: + CM_IGBH_DATASET_TYPE: debug + CM_IGBH_DATASET_SIZE: tiny + full: + group: dataset-type + env: + CM_IGBH_DATASET_TYPE: full + CM_IGBH_DATASET_SIZE: full + glt: + env: + CM_IGBH_GRAPH_COMPRESS: yes + csc: + group: compressed-layout + default: true + env: + CM_IGBH_GRAPH_COMPRESS_LAYOUT: csc + csr: + group: compressed-layout + env: + CM_IGBH_GRAPH_COMPRESS_LAYOUT: csr diff --git a/script/get-dataset-mlperf-inference-gnn/customize.py b/script/get-dataset-mlperf-inference-igbh/customize.py similarity index 94% rename from script/get-dataset-mlperf-inference-gnn/customize.py rename to script/get-dataset-mlperf-inference-igbh/customize.py index 33105ffef2..9d4240209a 100644 --- a/script/get-dataset-mlperf-inference-gnn/customize.py +++ b/script/get-dataset-mlperf-inference-igbh/customize.py @@ -21,15 +21,15 @@ def preprocess(i): download_loc = env.get('CM_IGBH_DATASET_OUT_PATH', os.getcwd()) + env['CM_IGBH_DATASET_DOWNLOAD_LOCATION'] = download_loc + run_cmd += f"cd {graph_folder} " x_sep = " && " # download the model if env['CM_IGBH_DATASET_TYPE'] == "debug": run_cmd += x_sep + env['CM_PYTHON_BIN_WITH_PATH'] + \ - f" tools/download_igbh_test.py --target-path {download_loc}" - else: - run_cmd += x_sep + f"./tools/download_igbh_full.sh {download_loc}" + f" tools/download_igbh_test.py --target-path {download_loc} " # split seeds run_cmd += x_sep + \ diff --git a/script/get-dataset-mlperf-inference-gnn/run.sh b/script/get-dataset-mlperf-inference-igbh/run.sh similarity index 100% rename from script/get-dataset-mlperf-inference-gnn/run.sh rename to script/get-dataset-mlperf-inference-igbh/run.sh diff --git a/script/get-docker/_cm.yaml b/script/get-docker/_cm.yaml index 8849f89078..e2f33e875b 100644 --- a/script/get-docker/_cm.yaml +++ b/script/get-docker/_cm.yaml @@ -8,7 +8,9 @@ deps: docker_input_mapping: {} input_description: {} input_mapping: {} -new_env_keys: [] +new_env_keys: [ + "CM_DOCKER_VERSION" +] new_state_keys: [] post_deps: [] posthook_deps: [] diff --git a/script/get-docker/customize.py b/script/get-docker/customize.py index a3768abcaf..0d41346b45 100644 --- a/script/get-docker/customize.py +++ b/script/get-docker/customize.py @@ -68,4 +68,6 @@ def postprocess(i): env['CM_DOCKER_CACHE_TAGS'] = 'version-' + version + env['CM_DOCKER_VERSION'] = version + return {'return': 0, 'version': version} diff --git a/script/get-generic-python-lib/_cm.yaml b/script/get-generic-python-lib/_cm.yaml index 116141bd59..ee0a4cdd1f 100644 --- a/script/get-generic-python-lib/_cm.yaml +++ b/script/get-generic-python-lib/_cm.yaml @@ -14,6 +14,7 @@ deps: CM_TMP_USE_CUSTOM_PYTHON: - 'on' tags: get,python3 + dynamic: true - names: - python-pip - pip @@ -347,6 +348,9 @@ variations: CM_GENERIC_PYTHON_PACKAGE_NAME: '#' CM_GENERIC_PYTHON_PIP_UNINSTALL_DEPS: '' CM_GENERIC_PYTHON_PIP_URL: '' + find_links_url.#: + env: + CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: '#' package.torch,cxx11-abi: env: CM_GENERIC_PYTHON_PIP_INDEX_URL: https://download.pytorch.org/whl/nightly/cpu-cxx11-abi diff --git a/script/get-generic-python-lib/customize.py b/script/get-generic-python-lib/customize.py index 4b837a79d7..6421a22a11 100644 --- a/script/get-generic-python-lib/customize.py +++ b/script/get-generic-python-lib/customize.py @@ -104,6 +104,13 @@ def preprocess(i): extra += ' --extra-index-url ' + extra_index_url + # check find-links + find_links_url = env.get( + 'CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL', '').strip() + + if find_links_url != '': + extra += ' -f ' + find_links_url + # Check update if env.get('CM_GENERIC_PYTHON_PIP_UPDATE', '') in [ True, 'true', 'yes', 'on']: @@ -128,8 +135,11 @@ def detect_version(i): env = i['env'] - env_version_key = 'CM_' + \ - env['CM_TMP_PYTHON_PACKAGE_NAME_ENV'].upper() + '_VERSION' + if env.get('CM_TMP_PYTHON_PACKAGE_NAME_ENV', '') != '': + env_version_key = 'CM_' + \ + env['CM_TMP_PYTHON_PACKAGE_NAME_ENV'].upper() + '_VERSION' + else: + env_version_key = 'CM_CACHE_TMP_VERSION' r = i['automation'].parse_version({'match_text': r'\s*([\d.a-z\-]+)', 'group_number': 1, diff --git a/script/get-generic-python-lib/validate_cache.bat b/script/get-generic-python-lib/validate_cache.bat new file mode 100644 index 0000000000..2612377c89 --- /dev/null +++ b/script/get-generic-python-lib/validate_cache.bat @@ -0,0 +1,4 @@ +IF NOT DEFINED CM_TMP_CURRENT_SCRIPT_PATH SET CM_TMP_CURRENT_SCRIPT_PATH=%CD% + +%CM_PYTHON_BIN_WITH_PATH% %CM_TMP_CURRENT_SCRIPT_PATH%\detect-version.py +IF %ERRORLEVEL% NEQ 0 EXIT 1 diff --git a/script/get-generic-python-lib/validate_cache.sh b/script/get-generic-python-lib/validate_cache.sh new file mode 100644 index 0000000000..b60ac0814c --- /dev/null +++ b/script/get-generic-python-lib/validate_cache.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +CM_TMP_CURRENT_SCRIPT_PATH=${CM_TMP_CURRENT_SCRIPT_PATH:-$PWD} + +${CM_PYTHON_BIN_WITH_PATH} ${CM_TMP_CURRENT_SCRIPT_PATH}/detect-version.py +test $? -eq 0 || exit $? +exit 0 diff --git a/script/get-generic-sys-util/customize.py b/script/get-generic-sys-util/customize.py index 794a529b96..18f85a7503 100644 --- a/script/get-generic-sys-util/customize.py +++ b/script/get-generic-sys-util/customize.py @@ -17,7 +17,10 @@ def preprocess(i): env['CM_SYS_UTIL_CHECK_CMD'] = env['CM_SYS_UTIL_VERSION_CMD'] if env.get('CM_GENERIC_SYS_UTIL_RUN_MODE', '') == "install": - i['run_script_input']['script_name'] = "install" + if env.get('CM_SYS_UTIL_INSTALL_WITH_RETRY', '') == "yes": + i['run_script_input']['script_name'] = "install-with-retry" + else: + i['run_script_input']['script_name'] = "install" if env.get('CM_GENERIC_SYS_UTIL_RUN_MODE', '') == "detect": if env.get('CM_SYS_UTIL_VERSION_CMD', '') != '' or env.get( diff --git a/script/get-generic-sys-util/install-with-retry.sh b/script/get-generic-sys-util/install-with-retry.sh new file mode 100644 index 0000000000..9abc55d085 --- /dev/null +++ b/script/get-generic-sys-util/install-with-retry.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# Safe execution of a command stored in a variable +cmd="${CM_SYS_UTIL_INSTALL_CMD}" +echo "$cmd" + +# set the max number of retries as well as the delay between the retries +max_retries=3 +delay_in_retry=3 + + +for ((i=1; i<=max_retries; i++)); do + echo "Attempting to install ${CM_SYS_UTIL_NAME} - $i of $max_retries..." + output=$(eval "$cmd" 2>&1) + echo "$output" + exit_status=$? + + if [[ $exit_status -ne 0 || "$output" == *"Temporary failure resolving"* || "$output" == *"Unable to fetch some archives"* ]]; then + # Check for network-related errors in the output + if echo "$output" | grep -q -E "Could not resolve|Temporary failure resolving"; then + echo "Network issue detected, retrying in $delay_in_retry seconds..." + sleep $delay_in_retry + else + # If it's a non-network error, handle based on fail-safe setting + if [[ "${CM_TMP_FAIL_SAFE}" == 'yes' ]]; then + echo "CM_GET_GENERIC_SYS_UTIL_INSTALL_FAILED=yes" > tmp-run-env.out + echo "Fail-safe is enabled, exiting with status 0." + exit 0 + else + echo "Fail-safe is not enabled, exiting with error status $exit_status." + exit $exit_status + fi + fi + else + # If the command succeeded + echo "Successfully installed ${CM_SYS_UTIL_NAME}." + exit 0 + fi + + # If this was the last retry, print a final failure message + if [[ $i -eq $max_retries ]]; then + echo "Installation failed after $max_retries attempts due to persistent network issues." + if [[ "${CM_TMP_FAIL_SAFE}" == 'yes' ]]; then + exit 0 + else + exit 1 + fi + fi +done diff --git a/script/get-generic-sys-util/install.sh b/script/get-generic-sys-util/install.sh index 145d0b7451..c8f532c49a 100644 --- a/script/get-generic-sys-util/install.sh +++ b/script/get-generic-sys-util/install.sh @@ -18,4 +18,4 @@ if ! eval "$cmd"; then else #echo "Command succeeded" exit 0 -fi +fi \ No newline at end of file diff --git a/script/get-ml-model-rgat/_cm.yaml b/script/get-ml-model-rgat/_cm.yaml index 1aa9f3c1f8..0bc4b1eab1 100644 --- a/script/get-ml-model-rgat/_cm.yaml +++ b/script/get-ml-model-rgat/_cm.yaml @@ -38,6 +38,8 @@ tags: - rgat uid: b409fd66c5ad4ed5 variations: + pytorch: + default: true fp32: default: true env: diff --git a/script/get-ml-model-rgat/customize.py b/script/get-ml-model-rgat/customize.py index 3f2c6c0af6..2fc39c59d2 100644 --- a/script/get-ml-model-rgat/customize.py +++ b/script/get-ml-model-rgat/customize.py @@ -20,7 +20,8 @@ def postprocess(i): env = i['env'] if env.get('RGAT_CHECKPOINT_PATH', '') == '': - env['RGAT_CHECKPOINT_PATH'] = env['CM_ML_MODEL_PATH'] + env['RGAT_CHECKPOINT_PATH'] = os.path.join( + env['CM_ML_MODEL_PATH'], "RGAT.pt") elif env.get('CM_ML_MODEL_PATH', '') == '': env['CM_ML_MODEL_PATH'] = env['RGAT_CHECKPOINT_PATH'] diff --git a/script/get-mlperf-inference-sut-description/customize.py b/script/get-mlperf-inference-sut-description/customize.py index 6145d96cbd..0f7693fa0e 100644 --- a/script/get-mlperf-inference-sut-description/customize.py +++ b/script/get-mlperf-inference-sut-description/customize.py @@ -88,6 +88,13 @@ def preprocess(i): state['CM_SUT_META']['other_software_stack'] = "Python: " + \ python_version + ", " + compiler + "-" + compiler_version + if env.get('CM_DOCKER_VERSION', '') != '': + state['CM_SUT_META']['other_software_stack'] += " Docker version:" + \ + env['CM_DOCKER_VERSION'] + else: + if os.path.exists('/.dockerenv'): + state['CM_SUT_META']['other_software_stack'] += ", Using Docker " + if state['CM_SUT_META'].get('system_name', '') == '': system_name = env.get('CM_MLPERF_SYSTEM_NAME') if not system_name: diff --git a/script/get-spec-ptd/customize.py b/script/get-spec-ptd/customize.py index b4c949179d..6e88200dd5 100644 --- a/script/get-spec-ptd/customize.py +++ b/script/get-spec-ptd/customize.py @@ -1,6 +1,7 @@ from cmind import utils import os import shutil +import stat def preprocess(i): @@ -19,9 +20,18 @@ def postprocess(i): binary_name = "ptd-windows-x86.exe" else: binary_name = "ptd-linux-x86" - if 'CM_MLPERF_PTD_PATH' not in env: + if env.get('CM_MLPERF_PTD_PATH', '') == '': env['CM_MLPERF_PTD_PATH'] = os.path.join( - env['CM_MLPERF_POWER_SOURCE'], 'inference_v1.0', binary_name) + env['CM_MLPERF_POWER_SOURCE'], 'PTD', 'binaries', binary_name) + + file_path = env['CM_MLPERF_PTD_PATH'] + current_permissions = os.stat(file_path).st_mode + + # Check if the file already has execute permissions + if not (current_permissions & stat.S_IXUSR): # Check user execute permission + # Add execute permissions for the user + os.chmod(file_path, current_permissions | stat.S_IXUSR) + env['CM_SPEC_PTD_PATH'] = env['CM_MLPERF_PTD_PATH'] return {'return': 0} diff --git a/script/run-mlperf-inference-app/_cm.yaml b/script/run-mlperf-inference-app/_cm.yaml index 5d00eee0e9..cf390bc3ab 100644 --- a/script/run-mlperf-inference-app/_cm.yaml +++ b/script/run-mlperf-inference-app/_cm.yaml @@ -117,6 +117,7 @@ input_mapping: criteo_day23_raw_data_path: CM_CRITEO_DAY23_RAW_DATA_PATH use_dataset_from_host: CM_USE_DATASET_FROM_HOST use_model_from_host: CM_USE_MODEL_FROM_HOST + rgat_checkpoint_path: RGAT_CHECKPOINT_PATH new_state_keys: - app_mlperf_inference_* @@ -233,6 +234,8 @@ variations: tags: _full coco2014-dataset: tags: _full + igbh-dataset: + tags: _full env: CM_MLPERF_SUBMISSION_GENERATION_STYLE: full group: submission-generation-style @@ -445,6 +448,7 @@ input_description: - mixtral-8x7b - mobilenet - efficientnet + - rgat default: resnet50 desc: MLPerf model sort: 200 diff --git a/script/run-mlperf-inference-app/customize.py b/script/run-mlperf-inference-app/customize.py index 67f3493448..9b5f350d4b 100644 --- a/script/run-mlperf-inference-app/customize.py +++ b/script/run-mlperf-inference-app/customize.py @@ -5,7 +5,7 @@ import subprocess import cmind as cm import copy -from tabulate import tabulate +import mlperf_utils summary_ext = ['.csv', '.json', '.xlsx'] @@ -325,7 +325,7 @@ def preprocess(i): # Better to do this in a stand alone CM script with proper deps but # currently we manage this by modifying the sys path of the python # executing CM - import mlperf_utils + from tabulate import tabulate # noqa print(sut) result_table, headers = mlperf_utils.get_result_table( diff --git a/script/run-mlperf-power-server/_cm.yaml b/script/run-mlperf-power-server/_cm.yaml index 18f21af75b..c4c4546b15 100644 --- a/script/run-mlperf-power-server/_cm.yaml +++ b/script/run-mlperf-power-server/_cm.yaml @@ -43,7 +43,9 @@ input_mapping: device_type: CM_MLPERF_POWER_DEVICE_TYPE interface_flag: CM_MLPERF_POWER_INTERFACE_FLAG ntp_server: CM_MLPERF_POWER_NTP_SERVER + conf_file: CM_MLPERF_POWER_SERVER_CONF_FILE screen: CM_MLPERF_POWER_SERVER_USE_SCREEN + num_analyzers: CM_MLPERF_POWER_NUM_ANALYZERS tags: - run - mlc diff --git a/script/run-mlperf-power-server/customize.py b/script/run-mlperf-power-server/customize.py index ea989bb401..bcdbe542ef 100644 --- a/script/run-mlperf-power-server/customize.py +++ b/script/run-mlperf-power-server/customize.py @@ -8,21 +8,65 @@ def preprocess(i): os_info = i['os_info'] env = i['env'] + + # Initialize ConfigParser config = configparser.ConfigParser() - server_config_file = os.path.join( - env['CM_MLPERF_POWER_SOURCE'], - 'ptd_client_server', - 'server.template.conf') + + if env.get('CM_MLPERF_POWER_SERVER_CONF_FILE', '') != '': + server_config_file = env['CM_MLPERF_POWER_SERVER_CONF_FILE'] + else: + server_config_file = os.path.join( + env.get('CM_MLPERF_POWER_SOURCE', ''), + 'ptd_client_server', + 'server.template.conf' + ) + + # Read the configuration file with error handling + if not os.path.exists(server_config_file): + raise FileNotFoundError( + f"Server config file not found: {server_config_file}") + config.read(server_config_file) - config['server']['ntpServer'] = env['CM_MLPERF_POWER_NTP_SERVER'] - config['server']['listen'] = env['CM_MLPERF_POWER_SERVER_ADDRESS'] + \ - " " + env['CM_MLPERF_POWER_SERVER_PORT'] - config['ptd']['ptd'] = env['CM_MLPERF_PTD_PATH'] - config['ptd']['interfaceFlag'] = env['CM_MLPERF_POWER_INTERFACE_FLAG'] - config['ptd']['deviceType'] = env['CM_MLPERF_POWER_DEVICE_TYPE'] - config['ptd']['devicePort'] = env['CM_MLPERF_POWER_DEVICE_PORT'] - with open('power-server.conf', 'w') as configfile: + # Update the server section + try: + config['server']['ntpServer'] = env['CM_MLPERF_POWER_NTP_SERVER'] + config['server']['listen'] = f"{env['CM_MLPERF_POWER_SERVER_ADDRESS']} {env['CM_MLPERF_POWER_SERVER_PORT']}" + except KeyError as e: + raise KeyError(f"Missing required environment variable: {e}") + + # Define number of analyzers and network port start + num_analyzers = int(env.get('CM_MLPERF_POWER_NUM_ANALYZERS', 1)) + network_port_start = int( + env.get( + 'CM_MLPERF_POWER_NETWORK_PORT_START', + 8888)) + + # Ensure 'ptd' section exists + if 'ptd' not in config: + config.add_section('ptd') + + config['ptd']['ptd'] = str(env.get('CM_MLPERF_PTD_PATH', '')) + config['ptd']['analyzercount'] = str(num_analyzers) + + # Add analyzers to the configuration + for aid in range(1, num_analyzers + 1): + analyzer_section = f'analyzer{aid}' + if analyzer_section not in config: + config.add_section(analyzer_section) + + # Add the analyzer subsection as keys under the 'ptd' section + config[f'{analyzer_section}']['interfaceFlag'] = str( + env.get('CM_MLPERF_POWER_INTERFACE_FLAG', '')) + config[f'{analyzer_section}']['deviceType'] = str( + env.get('CM_MLPERF_POWER_DEVICE_TYPE', '')) + config[f'{analyzer_section}']['devicePort'] = str( + env.get('CM_MLPERF_POWER_DEVICE_PORT', '')) + config[f'{analyzer_section}']['networkPort'] = str( + network_port_start + aid - 1) + + with open('tmp-power-server.conf', 'w') as configfile: config.write(configfile) + print({section: dict(config[section]) for section in config.sections()}) if env['CM_HOST_OS_TYPE'] == "windows": @@ -33,7 +77,7 @@ def preprocess(i): cmd = env['CM_PYTHON_BIN_WITH_PATH'] + ' ' + os.path.join( env['CM_MLPERF_POWER_SOURCE'], 'ptd_client_server', - 'server.py') + ' -c power-server.conf' + 'server.py') + ' -c tmp-power-server.conf' if env.get('CM_MLPERF_POWER_SERVER_USE_SCREEN', 'no') == 'yes': cmd = cmd_prefix + ' screen -d -m ' + cmd + ' ' else: diff --git a/script/test-cm-core/src/script/test_features.py b/script/test-cm-core/src/script/test_features.py index cde5e3ed21..fa8ccb7060 100644 --- a/script/test-cm-core/src/script/test_features.py +++ b/script/test-cm-core/src/script/test_features.py @@ -29,3 +29,27 @@ r = cm.access({'action': 'search', 'automation': 'cache', 'tags': 'get,dataset,preprocessed,imagenet,-_NHWC'}) checks.check_list(r, "_NHWC", False) + +r = cm.access({'action': 'run', + 'automation': 'script', + 'tags': 'get,generic-python-lib,_package.scipy', + 'version': '1.9.3', + 'quiet': 'yes'}) + +r = cm.access({'action': 'run', + 'automation': 'script', + 'tags': 'get,generic-python-lib,_package.scipy', + 'version': '1.9.2', + 'quiet': 'yes'}) + +r = cm.access({'action': 'run', + 'automation': 'script', + 'tags': 'get,generic-python-lib,_package.scipy', + 'version': '1.9.3', + 'only_execute_from_cache': True, + 'quiet': 'yes'}) + +# r should return error +if 'error' not in r: + print(r) + raise Exception('Invalidated cache entry for scipy==1.9.3 found in cache')