diff --git a/.github/workflows/test-cmx-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml b/.github/workflows/test-cmx-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml index 30ce1ad17..38fd81454 100644 --- a/.github/workflows/test-cmx-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml +++ b/.github/workflows/test-cmx-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml @@ -50,7 +50,7 @@ jobs: if: matrix.os != 'windows-latest' run: | cmx run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=bert-99 --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=5 --precision=${{ matrix.precision }} --target_qps=1 --v --quiet - - name: Push Results + - name: Push Test MLPerf Results (only for cTuning dev branches) if: github.repository_owner == 'ctuning' env: USER: "GitHub Action" diff --git a/.github/workflows/test-cmx-mlperf-inference-resnet50.yml b/.github/workflows/test-cmx-mlperf-inference-resnet50.yml index 7f0a9de9e..2b05eebd9 100644 --- a/.github/workflows/test-cmx-mlperf-inference-resnet50.yml +++ b/.github/workflows/test-cmx-mlperf-inference-resnet50.yml @@ -50,7 +50,7 @@ jobs: if: matrix.os != 'windows-latest' run: | cmx run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=resnet50 --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --target_qps=1 --v --quiet - - name: Push Results + - name: Push Test MLPerf Results (only for cTuning dev branches) if: github.repository_owner == 'ctuning' env: USER: "GitHub Action" diff --git a/.github/workflows/test-cmx-mlperf-inference-rgat.yml b/.github/workflows/test-cmx-mlperf-inference-rgat.yml index 150fb0c8b..bc6bd8070 100644 --- a/.github/workflows/test-cmx-mlperf-inference-rgat.yml +++ b/.github/workflows/test-cmx-mlperf-inference-rgat.yml @@ -35,7 +35,7 @@ jobs: - name: Test MLPerf Inference R-GAT using ${{ matrix.backend }} on ${{ matrix.os }} run: | cmx run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --adr.inference-src.tags=_branch.dev --pull_changes=yes --pull_inference_changes=yes --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=rgat --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --category=datacenter --quiet --v --target_qps=1 - - name: Push Results + - name: Push Test MLPerf Results (only for cTuning dev branches) if: github.repository_owner == 'ctuning' env: USER: "GitHub Action" diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 277e33d51..8108f5b80 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -13,78 +13,3 @@ MLCommons project work is tracked with issue trackers and pull requests. Modify the project in your own fork and issue a pull request once you want other developers to take a look at what you have done and discuss the proposed changes. Ensure that cla-bot and other checks pass for your Pull requests. - -Collective Knowledge (CK), Collective Mind (CM) and Common Metadata eXchange (CMX) -were created by [Grigori Fursin](https://arxiv.org/abs/2406.16791), -sponsored by cKnowledge.org and cTuning.org, and donated to MLCommons -to benefit everyone. Since then, this open-source automation technology -(CM/CMX, CM4MLOps/MLPerf automations, CM4ABTF, CM4Research, etc) is being extended -as a community effort thanks to all our volunteers, collaborators -and contributors listed here in alphabetical order: - -* @Henryfzh -* @Leonard226 -* @Oseltamivir -* @Submandarine -* Resmi Arjun -* Omar Benjelloun (Google) -* Alice Cheng (Nvidia) -* Jiahao Chen (MIT) -* Ramesh N Chukka (Intel) -* Ray DeMoss (One Stop Systems) -* Ryan T DeRue (Purdue University) -* Himanshu Dutta (Indian Institute of Technology) -* Nicolas Essayan -* Justin Faust (One Stop Systems) -* Diane Feddema (Red Hat) -* Leonid Fursin (United Silicon Carbide) -* Anirban Ghosh (Nvidia) -* James Goel (Qualcomm) -* Michael Goin (Neural Magic) -* Jose Armando Hernandez (Paris Saclay University) -* Mehrdad Hessar (OctoML) -* Miro Hodak (AMD) -* Sachin Idgunji (Nvidia) -* Tom Jablin (Google) -* Nino Jacob -* David Kanter (MLCommons) -* Alex Karargyris -* Jason Knight (OctoML) -* Ilya Kozulin (Deelvin) -* @makaveli10 (Collabora) -* Steve Leak(NERSC) -* Amija Maji (Purdue University) -* Peter Mattson (Google, MLCommons) -* Kasper Mecklenburg (Arm) -* Pablo Gonzalez Mesa -* Thierry Moreau (OctoML) -* Sachin Mudaliyar -* Stanley Mwangi (Microsoft) -* Ashwin Nanjappa (Nvidia) -* Hai Ah Nam (NERSC) -* Nandeeka Nayak (UIUC) -* Datta Nimmaturi (Nutanix) -* Lakshman Patel -* Arun Tejusve Raghunath Rajan (Cruise) -* Vijay Janapa Reddi (Harvard University) -* Andrew Reusch (OctoML) -* Anandhu Sooraj (Kerala Technical University) -* Sergey Serebryakov (HPE) -* Warren Schultz (Principled Technologies) -* Amrutha Sheleenderan (Kerala Technical University) -* Micah J Sheller (Intel) -* Byoungjun Seo (TTA) -* Aditya Kumar Shaw (Indian Institute of Science) -* Ilya Slavutin (Deelvin) -* Jinho Suh (Nvidia) -* Arjun Suresh -* Badhri Narayanan Suresh (Intel) -* David Tafur (MLCommons) -* Chloe Tessier -* Gaurav Verma (Stony Brook University) -* Zixian Wang -* Nathan Wasson -* Scott Wasson (MLCommons) -* Haoyang Zhang (UIUC) -* Bojian Zheng (University of Toronto) -* Thomas Zhu (Oxford University) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md new file mode 100644 index 000000000..c88508712 --- /dev/null +++ b/CONTRIBUTORS.md @@ -0,0 +1,141 @@ +Collective Knowledge (CK), Collective Mind (CM) and Common Metadata eXchange (CMX) +were created by [Grigori Fursin](https://arxiv.org/abs/2406.16791), +sponsored by cKnowledge.org and cTuning.org, and donated to MLCommons +to benefit everyone. Since then, this open-source automation technology +(CM, CMX, MLPerf automations, etc) is being extended +as a community effort thanks to all our volunteers, collaborators +and contributors listed here in alphabetical order: + +# MLCommons Collective Mind (CM) + +* @Henryfzh +* @Leonard226 +* @Oseltamivir +* @Submandarine +* Resmi Arjun +* Omar Benjelloun (Google) +* Alice Cheng (Nvidia) +* Jiahao Chen (MIT) +* Ramesh N Chukka (Intel) +* Ray DeMoss (One Stop Systems) +* Ryan T DeRue (Purdue University) +* Himanshu Dutta (Indian Institute of Technology) +* Nicolas Essayan +* Justin Faust (One Stop Systems) +* Diane Feddema (Red Hat) +* Leonid Fursin (United Silicon Carbide) +* Anirban Ghosh (Nvidia) +* James Goel (Qualcomm) +* Michael Goin (Neural Magic) +* Jose Armando Hernandez (Paris Saclay University) +* Mehrdad Hessar (OctoML) +* Miro Hodak (AMD) +* Sachin Idgunji (Nvidia) +* Tom Jablin (Google) +* Nino Jacob +* David Kanter (MLCommons) +* Alex Karargyris +* Jason Knight (OctoML) +* Ilya Kozulin (Deelvin) +* @makaveli10 (Collabora) +* Steve Leak(NERSC) +* Amija Maji (Purdue University) +* Peter Mattson (Google, MLCommons) +* Kasper Mecklenburg (Arm) +* Pablo Gonzalez Mesa +* Thierry Moreau (OctoML) +* Sachin Mudaliyar +* Stanley Mwangi (Microsoft) +* Ashwin Nanjappa (Nvidia) +* Hai Ah Nam (NERSC) +* Nandeeka Nayak (UIUC) +* Datta Nimmaturi (Nutanix) +* Lakshman Patel +* Arun Tejusve Raghunath Rajan (Cruise) +* Vijay Janapa Reddi (Harvard University) +* Andrew Reusch (OctoML) +* Anandhu Sooraj (Kerala Technical University) +* Sergey Serebryakov (HPE) +* Warren Schultz (Principled Technologies) +* Amrutha Sheleenderan (Kerala Technical University) +* Micah J Sheller (Intel) +* Byoungjun Seo (TTA) +* Aditya Kumar Shaw (Indian Institute of Science) +* Ilya Slavutin (Deelvin) +* Jinho Suh (Nvidia) +* Arjun Suresh +* Badhri Narayanan Suresh (Intel) +* David Tafur (MLCommons) +* Chloe Tessier +* Gaurav Verma (Stony Brook University) +* Zixian Wang +* Nathan Wasson +* Scott Wasson (MLCommons) +* Haoyang Zhang (UIUC) +* Bojian Zheng (University of Toronto) +* Thomas Zhu (Oxford University) + +See more acknowledgments at the end of this [article](https://arxiv.org/abs/2406.16791), +which describes the Collective Mind workflow automation framework. + +# Legacy Collective Knowledge framework (CK) + +* Sam Ainsworth (University of Cambridge, UK) +* Saheli Bhattacharjee (@sahelib25) +* Gianfranco Costamagna +* Chris Cummins (Facebook) +* Valentin Dalibard <valentin.dalibard@cl.cam.ac.uk> +* Alastair Donaldson <alastair.donaldson@imperial.ac.uk> +* Thibaut Dumontet +* Daniil Efremov (Xored) +* Todd Gamblin (LLNL) +* Chandan Reddy Gopal (ENS Paris) +* Leo Gordon (dividiti) +* Dave Greasley (University of Bristol) +* Herve Guillou +* Vincent Grevendonk (Arm) +* Christophe Guillon (STMicroelectronics) +* Sven van Haastregt (Arm) +* Michael Haidl +* Stephen Herbein (LLNL) +* Patrick Hesse (College of Saint Benedict and Saint John's University) +* Nikolay Istomin (Xored) +* Kenan Kalajdzic +* Yuriy Kashnikov +* Alexey Kravets (Arm) +* Michael Kruse <MichaelKruse@meinersbur.de> +* Andrei Lascu <andrei.lascu10@imperial.ac.uk> +* Anton Lokhmotov (Krai) +* Graham Markall <graham.markall@continuum.io> +* Michael Mcgeagh (Arm) +* Abdul Wahid Memon <engrwahidmemon@gmail.com> +* Sachin Mudaliyar +* Luigi Nardi +* Cedric Nugteren <web@cedricnugteren.nl> +* Lucas Nussbaum (Universite de Lorraine) +* Ivan Ospiov (Xored) +* Lakshman Patel @Patel230 +* Egor Pasko (Google) +* Ed Plowman (Arm) +* Lahiru Rasnayake (NTNU) +* Vijay Janapa Reddi (Harvard University) +* Alex Redshaw (Arm) +* Vincent Rehm +* Toomas Remmelg (University of Edinburgh) +* Jarrett Revels (MIT) +* Dmitry Savenko (Xored) +* Gavin Simpson (Arm) +* Aaron Smith (Microsoft) +* Michel Steuwer (University of Edinburgh) +* Flavio Vella (Free University of Bozen-Bolzano) +* Gaurav Verma (Stony Brook University) +* Emanuele Vitali +* Dave Wilkinson (University of Pittsburgh) +* Sergey Yakushkin (Synopsys) +* Eiko Yoneki <eiko.yoneki@cl.cam.ac.uk> +* Thomas Zhu (Oxford University) <thomas.zhu.sh@gmail.com> +* @filven +* @ValouBambou + +See more acknowledgments at the end of this [article](https://doi.org/10.1098/rsta.2020.0211), +which describes the original Collective Knowledge workflow automation framework. diff --git a/README.md b/README.md index 158dcf656..acbea9e21 100755 --- a/README.md +++ b/README.md @@ -11,117 +11,92 @@ ## Collective Knowledge -[Collective Knowledge (CK, CM, CM4MLOps, CM4MLPerf and CMX)](https://cKnowledge.org) -is an educational community project to learn how to run AI, ML and other emerging workloads +[Collective Knowledge (CK)](https://cKnowledge.org) +is an educational project to learn how to run AI, ML and other emerging workloads in the most efficient and cost-effective way across diverse models, data sets, software and hardware: [ [white paper](https://arxiv.org/abs/2406.16791) ]. It includes the following sub-projects. -### Collective Minds (CM) +### Common Metadata eXchange (CMX) -The Collective Mind (CM) project, or Collective Minds, facilitates the -decomposition of complex software systems into portable, reusable, and -interconnected automation recipes. These recipes are developed and -continuously improved by the community. +The [CMX framework](https://github.com/mlcommons/ck/tree/master/cmx) +facilitates the decomposition of complex software systems and benchmarks such as MLPerf +into portable, reusable, and interconnected automation recipes for MLOps and DevOps. +These recipes are developed and continuously improved by the community. -#### CM automation framework +***Starting in 2025, CMX V4.0.0 serves as drop-in, backward-compatible replacement + for the earlier [Collective Mind framework (CM)](https://github.com/mlcommons/ck/tree/master/cm), + while providing a simpler and more robust interface.*** -The [Collective Mind framework](https://github.com/mlcommons/ck/tree/master/cm) -is a lightweight, Python-based toolset featuring -a unified command-line interface (CLI), Python API, and minimal dependencies. -It is designed to assist researchers and engineers in automating repetitive, time-consuming -tasks such as building, running, benchmarking, and optimizing AI, machine learning, -and other applications across diverse and continuously changing models, data, software and hardware. +CMX is a lightweight, Python-based toolset that provides a unified +command-line interface (CLI), a Python API, and minimal dependencies. +It is designed to help researchers and engineers automate repetitive, +time-consuming tasks such as building, running, benchmarking, and +optimizing AI, machine learning, and other applications across diverse and +constantly evolving models, data, software, and hardware. -Collective Mind is continuously enhanced through public and private Git repositories -with CM automation recipes and artifacts accessible via unified CM interface. +CMX is continuously enhanced through public and private Git repositories, +providing automation recipes and artifacts that are seamlessly accessible +via its unified interface. -#### CMX automation framework +### MLOps and MLPerf automations -[CMX](https://github.com/mlcommons/ck/tree/master/cmx) is the next evolution -of the Collective Mind framework designed to enhance simplicity, flexibility, and extensibility of automations -based on user feedback. It is backwards compatible with CM, released along with CM -in the [cmind package](https://pypi.org/project/cmind/) and can serve as drop-in replacement for CM. - -The CM/CMX architecture diagram is available for viewing -[here](https://github.com/mlcommons/ck/tree/master/docs/specs/cm-diagram-v3.5.1.png). - - - -### Notable CM use cases - -#### MLOps and MLPerf automations - -[CM4MLOPS repository powered by CM](https://github.com/mlcommons/ck/tree/master/cm-mlops) - -a collection of portable, extensible and technology-agnostic automation recipes +We have developed a collection of portable, extensible and technology-agnostic automation recipes with a common CLI and Python API (CM scripts) to unify and automate all the manual steps required to compose, run, benchmark and optimize complex ML/AI applications on diverse platforms with any software and hardware. -The two key automations are *script" and *cache*: +The two key automations are *script* and *cache*: see [online catalog at CK playground](https://access.cknowledge.org/playground/?action=scripts), [online MLCommons catalog](https://docs.mlcommons.org/cm4mlops/scripts). CM scripts extend the concept of `cmake` with simple Python automations, native scripts and JSON/YAML meta descriptions. They require Python 3.7+ with minimal dependencies and are -[continuously extended by the community and MLCommons members](https://github.com/mlcommons/ck/blob/master/CONTRIBUTING.md) +[continuously extended by the community and MLCommons members](https://github.com/mlcommons/ck/blob/master/CONTRIBUTORS.md) to run natively on Ubuntu, MacOS, Windows, RHEL, Debian, Amazon Linux and any other operating system, in a cloud or inside automatically generated containers while keeping backward compatibility. See the [online documentation](https://docs.mlcommons.org/inference) -at MLCommons to run MLPerf inference benchmarks across diverse systems using CM. - -#### MLCommons ABTF automation +at MLCommons to run MLPerf inference benchmarks across diverse systems using CMX. +Just install `pip install cmind` and substitute the following commands with `cmx`: +* `cm` -> `cmx` +* `mlc` -> `cmx run mlc` +* `mlcr` -> `cmx run mlcr` -[CM4ABTF repository powered by CM](https://github.com/mlcommons/cm4abtf) - -a collection of portable automations and CM scripts to run the upcoming -automotive MLPerf benchmark across different models, data sets, software -and hardware from different vendors. - -#### MLPerf results visualization +### MLPerf results visualization [CM4MLPerf-results powered by CM](https://github.com/mlcommons/cm4mlperf-results) - a simplified and unified representation of the past MLPerf results in the CM format for further visualization and analysis using [CK graphs](https://access.cknowledge.org/playground/?action=experiments). -#### Collective Knowledge Playground +### Collective Knowledge Playground [Collective Knowledge Playground](https://access.cKnowledge.org) - a unified and open-source platform designed to [index all CM scripts](https://access.cknowledge.org/playground/?action=scripts) similar to PYPI, assist users in preparing CM commands to: +* aggregate, process, visualize, and compare [MLPerf benchmarking results](https://access.cknowledge.org/playground/?action=experiments) for AI and ML systems * [run MLPerf benchmarks](https://access.cknowledge.org/playground/?action=howtorun) -* aggregate, process, visualize, and compare [benchmarking results](https://access.cknowledge.org/playground/?action=experiments) for AI and ML systems -* organize [open, reproducible optimization challenges and tournaments](https://access.cknowledge.org/playground/?action=challenges). +* organize [open and reproducible optimization challenges and tournaments](https://access.cknowledge.org/playground/?action=challenges). These initiatives aim to help academia and industry collaboratively enhance the efficiency and cost-effectiveness of AI systems. -#### Artifact Evaluation +### Artifact Evaluation and Reproducibility Initiatives [Artifact Evaluation automation](https://cTuning.org/ae) - a community-driven initiative leveraging the Collective Mind framework to automate artifact evaluation and support reproducibility efforts at ML and systems conferences. -* [CM4Research repository powered by CM](https://github.com/ctuning/cm4research) - -a unified interface designed to streamline the preparation, execution, and reproduction of experiments in research projects. - ## Legacy projects -### CM-MLOps (now CM4MLOps) - -You can find CM-MLOps original dev directory [here](https://github.com/mlcommons/ck/tree/master/cm-mlops). -We moved it to [CM4MLOps](https://github.com/mlcommons/ck/tree/master/cm4mlops) in 2024. -In 2025, we aggregate all CM and CMX automations in the [new CMX4MLOps repository](https://github.com/mlcommons/ck/tree/master/cmx4mlops). - -### CK automation framework v1 and v2 - -You can find the original CK automation framework v1 and v2 directory [here](https://github.com/mlcommons/ck/tree/master/ck). -It was deprecated for the [CM framework](https://github.com/mlcommons/ck/tree/master/cm) -and later for the [CMX workflow automation framework (backwards compatible with CM)](https://github.com/mlcommons/ck/tree/master/cmx) +* [CM](https://github.com/mlcommons/ck/tree/master/cm) +* [CM-MLOps](https://github.com/mlcommons/ck/tree/master/cm-mlops) +* [CM4MLOps](https://github.com/mlcommons/cm4mlops) +* [CK automation framework v1 and v2](https://github.com/mlcommons/ck/tree/master/ck) ## License @@ -130,7 +105,7 @@ and later for the [CMX workflow automation framework (backwards compatible with ## Copyright -Copyright (c) 2021-2024 MLCommons +Copyright (c) 2021-2025 MLCommons Grigori Fursin, the cTuning foundation and OctoML donated this project to MLCommons to benefit everyone. @@ -142,12 +117,12 @@ Copyright (c) 2014-2021 cTuning foundation ## Maintainers -* CM, CM4MLOps and MLPerf automations: MLCommons -* CMX (the next generation of CM): Grigori Fursin +* CM, CM4MLOps and MLPerf automations: [MLCommons infra WG](https://mlcommons.org) +* CMX (the next generation of CM since 2025): [Grigori Fursin](https://cKnowledge.org/gfursin) ## Long-term vision -To learn more about the motivation behind CK and CM technology, please explore the following presentations: +To learn more about the motivation behind this project, please explore the following presentations: * "Enabling more efficient and cost-effective AI/ML systems with Collective Mind, virtualized MLOps, MLPerf, Collective Knowledge Playground and reproducible optimization tournaments": [ [ArXiv](https://arxiv.org/abs/2406.16791) ] * ACM REP'23 keynote about the MLCommons CM automation framework: [ [slides](https://doi.org/10.5281/zenodo.8105339) ] @@ -157,28 +132,22 @@ To learn more about the motivation behind CK and CM technology, please explore t ## Documentation * [White paper](https://arxiv.org/abs/2406.16791) -* [CM/CMX architecture](https://github.com/mlcommons/ck/tree/master/docs/specs/cm-diagram-v3.5.1.png) -* [CM/CMX installation GUI](https://access.cknowledge.org/playground/?action=install) -* [CM Getting Started Guide and FAQ](https://github.com/mlcommons/ck/tree/master/docs/getting-started.md) - * [Common CM interface to run MLPerf inference benchmarks](https://github.com/mlcommons/ck/tree/master/docs/mlperf/inference) - * [Common CM interface to re-run experiments from ML and Systems papers including MICRO'23 and the Student Cluster Competition @ SuperComputing'23](https://github.com/mlcommons/ck/tree/master/docs/tutorials/common-interface-to-reproduce-research-projects.md) - * [CM4MLOps automation recipes for MLOps and DevOps](https://access.cknowledge.org/playground/?action=scripts) - * [Other CM tutorials](https://github.com/mlcommons/ck/tree/master/docs/tutorials) -* [Full documentation](https://github.com/mlcommons/ck/tree/master/docs/README.md) -* [CM taskforce](https://github.com/mlcommons/ck/tree/master/docs/taskforce.md) -* History: [CK](https://github.com/mlcommons/ck/tree/master/docs/history.md), [CM and CM automations for MLOps and MLPerf](https://github.com/mlcommons/ck/blob/master/HISTORY.CM.md) +* [CMX architecture](https://github.com/mlcommons/ck/tree/master/docs/specs/cm-diagram-v3.5.1.png) +* [CMX installation GUI](https://access.cknowledge.org/playground/?action=install) + +*TBD* ### Acknowledgments This open-source project was created by [Grigori Fursin](https://cKnowledge.org/gfursin) and sponsored by cTuning.org, OctoAI and HiPEAC. -Grigori donated CK to MLCommons to benefit the community -and to advance its development as a collaborative, community-driven effort. +Grigori donated this project to MLCommons to modularize and automate MLPerf benchmarks, +benefit the community, and foster its development as a collaborative, community-driven effort. We thank [MLCommons](https://mlcommons.org), [FlexAI](https://flex.ai) and [cTuning](https://cTuning.org) for supporting this project, -as well as our dedicated [volunteers and collaborators](https://github.com/mlcommons/ck/blob/master/CONTRIBUTING.md) +as well as our dedicated [volunteers and collaborators](https://github.com/mlcommons/ck/blob/master/CONTRIBUTORS.md) for their feedback and contributions! If you found the CM automations helpful, kindly reference this article: diff --git a/cm/CHANGES.md b/cm/CHANGES.md index a7a6ec8c9..1a1846c4d 100644 --- a/cm/CHANGES.md +++ b/cm/CHANGES.md @@ -1,3 +1,6 @@ +## CMX V4.0.0 + - added CMX interface for new tools + ## V3.5.3 - fixed `cm info repo` and `cmx info repo`: https://github.com/mlcommons/ck/issues/1378 diff --git a/cm/README.md b/cm/README.md index 5cfb4b8ac..06e2db808 100644 --- a/cm/README.md +++ b/cm/README.md @@ -7,205 +7,139 @@ [![CM test](https://github.com/mlcommons/ck/actions/workflows/test-cm.yml/badge.svg)](https://github.com/mlcommons/ck/actions/workflows/test-cm.yml) [![CM script automation features test](https://github.com/mlcommons/ck/actions/workflows/test-cm-script-features.yml/badge.svg)](https://github.com/mlcommons/ck/actions/workflows/test-cm-script-features.yml) -## Collective Mind (CM) -Collective Mind (CM) is a very lightweight [Python-based framework](https://github.com/mlcommons/ck/tree/master/cm) -featuring a unified CLI, Python API, and minimal dependencies. It is available through [PYPI](https://pypi.org/project/cmind). +### Common Metadata eXchange (CMX) -CM is designed for creating and managing portable and technology-agnostic automations for MLOps, DevOps and ResearchOps. -It aims to assist researchers and engineers in automating their repetitive, tedious and time-consuming tasks -to build, run, benchmark and optimize various applications -across diverse and continuously changing models, data, software and hardware. +The [CMX framework](https://github.com/mlcommons/ck/tree/master/cmx) +facilitates the decomposition of complex software systems and benchmarks such as MLPerf +into portable, reusable, and interconnected automation recipes for MLOps and DevOps. +These recipes are developed and continuously improved by the community. -Collective Mind is a part of [Collective Knowledge (CK)](https://github.com/mlcommons/ck) - -an educational community project to learn how to run AI, ML and other emerging workloads -in the most efficient and cost-effective way across diverse -and ever-evolving systems using the MLPerf benchmarking methodology. - -#### CMX automation framework - -[CMX](https://github.com/mlcommons/ck/tree/master/cmx) - the next evolution -of the [Collective Mind framework](https://github.com/mlcommons/ck/tree/master/cm) -designed to enhance simplicity, flexibility, and extensibility of automations -based on user feedback. It is backwards compatible with CM, released along with CM -in the [cmind package](https://pypi.org/project/cmind/) and can serve as drop-in replacement for CM. +***Starting in 2025, CMX V4.0.0 serves as drop-in, backward-compatible replacement + for the earlier [Collective Mind framework (CM)](https://github.com/mlcommons/ck/tree/master/cm), + while providing a simpler and more robust interface.*** -The CM/CMX architecture diagram is available for viewing -[here](https://github.com/mlcommons/ck/tree/master/docs/specs/cm-diagram-v3.5.1.png). +CMX is a lightweight, Python-based toolset that provides a unified +command-line interface (CLI), a Python API, and minimal dependencies. +It is designed to help researchers and engineers automate repetitive, +time-consuming tasks such as building, running, benchmarking, and +optimizing AI, machine learning, and other applications across diverse and +constantly evolving models, data, software, and hardware. +CMX is continuously enhanced through public and private Git repositories, +providing automation recipes and artifacts that are seamlessly accessible +via its unified interface. -## CM and CMX architecture - -The diagram below illustrates the primary classes, functions, and internal automations within the Collective Mind framework: - -![](https://cKnowledge.org/images/cm-diagram-v3.5.1.png) - -The CM API documentation is available [here](https://cknowledge.org/docs/cm/api/cmind.html). - -## Projects powered by Collective Mind +CMX is a part of [Collective Knowledge (CK)](https://github.com/mlcommons/ck) - +an educational community project to learn how to run AI, ML and other emerging workloads +in the most efficient and cost-effective way across diverse +and ever-evolving systems using the MLPerf benchmarking methodology. -Collective Mind is continuously enhanced through public and private Git repositories, -which serve as the unified interface for various collections of reusable automations and artifacts. +### Legacy Collective Mind automation framework (CM) -The most notable projects and repositories powered by CM are: +* [GitHub](https://github.com/mlcommons/ck/tree/master/cm) -#### Automations for MLOps and MLPerf +### MLOps and MLPerf automations -[CM4MLOPS repository powered by CM](https://github.com/mlcommons/cm4mlops) - -a collection of portable, extensible and technology-agnostic automation recipes +We have developed a collection of portable, extensible and technology-agnostic automation recipes with a common CLI and Python API (CM scripts) to unify and automate all the manual steps required to compose, run, benchmark and optimize complex ML/AI applications on diverse platforms with any software and hardware. -The two key automations are *script" and *cache*: +The two key automations are *script* and *cache*: see [online catalog at CK playground](https://access.cknowledge.org/playground/?action=scripts), [online MLCommons catalog](https://docs.mlcommons.org/cm4mlops/scripts). CM scripts extend the concept of `cmake` with simple Python automations, native scripts and JSON/YAML meta descriptions. They require Python 3.7+ with minimal dependencies and are -[continuously extended by the community and MLCommons members](https://github.com/mlcommons/ck/blob/master/CONTRIBUTING.md) +[continuously extended by the community and MLCommons members](https://github.com/mlcommons/ck/blob/master/CONTRIBUTORS.md) to run natively on Ubuntu, MacOS, Windows, RHEL, Debian, Amazon Linux and any other operating system, in a cloud or inside automatically generated containers while keeping backward compatibility. -CM scripts were originally developed based on the following requirements from the -[MLCommons members](https://mlcommons.org) -to help them automatically compose and optimize complex MLPerf benchmarks, applications and systems -across diverse and continuously changing models, data sets, software and hardware -from Nvidia, Intel, AMD, Google, Qualcomm, Amazon and other vendors: -* must work out of the box with the default options and without the need to edit some paths, environment variables and configuration files; -* must be non-intrusive, easy to debug and must reuse existing - user scripts and automation tools (such as cmake, make, ML workflows, - python poetry and containers) rather than substituting them; -* must have a very simple and human-friendly command line with a Python API and minimal dependencies; -* must require minimal or zero learning curve by using plain Python, native scripts, environment variables - and simple JSON/YAML descriptions instead of inventing new workflow languages; -* must have the same interface to run all automations natively, in a cloud or inside containers. - See the [online documentation](https://docs.mlcommons.org/inference) -at MLCommons to run MLPerf inference benchmarks across diverse systems using CM. - -#### CM4ABTF - -[CM4ABTF repository powered by CM](https://github.com/mlcommons/cm4abtf) - -a collection of portable automations and CM scripts to run the upcoming -automotive MLPerf benchmark across different models, data sets, software -and hardware from different vendors. +at MLCommons to run MLPerf inference benchmarks across diverse systems using CMX. +Just install `pip install cmind` and substitute the following commands with `cmx`: +* `cm` -> `cmx` +* `mlc` -> `cmx run mlc` +* `mlcr` -> `cmx run mlcr` -#### CM4MLPerf-results +### MLPerf results visualization [CM4MLPerf-results powered by CM](https://github.com/mlcommons/cm4mlperf-results) - a simplified and unified representation of the past MLPerf results in the CM format for further visualization and analysis using [CK graphs](https://access.cknowledge.org/playground/?action=experiments). -#### CM4Research - -[CM4Research repository powered by CM](https://github.com/ctuning/cm4research) - -a unified interface designed to streamline the preparation, execution, and reproduction of experiments in research projects. - - -### Projects powered by Collective Mind - -#### Collective Knowledge Playground +### Collective Knowledge Playground [Collective Knowledge Playground](https://access.cKnowledge.org) - a unified and open-source platform designed to [index all CM scripts](https://access.cknowledge.org/playground/?action=scripts) similar to PYPI, assist users in preparing CM commands to: +* aggregate, process, visualize, and compare [MLPerf benchmarking results](https://access.cknowledge.org/playground/?action=experiments) for AI and ML systems * [run MLPerf benchmarks](https://access.cknowledge.org/playground/?action=howtorun) -* aggregate, process, visualize, and compare [benchmarking results](https://access.cknowledge.org/playground/?action=experiments) for AI and ML systems -* organize [open, reproducible optimization challenges and tournaments](https://access.cknowledge.org/playground/?action=challenges). +* organize [open and reproducible optimization challenges and tournaments](https://access.cknowledge.org/playground/?action=challenges). These initiatives aim to help academia and industry collaboratively enhance the efficiency and cost-effectiveness of AI systems. -#### Artifact Evaluation +### Artifact Evaluation and Reproducibility Initiatives [Artifact Evaluation automation](https://cTuning.org/ae) - a community-driven initiative leveraging the Collective Mind framework to automate artifact evaluation and support reproducibility efforts at ML and systems conferences. -CM scripts extend the concept of `cmake` with simple Python automations, native scripts -and JSON/YAML meta descriptions. They require Python 3.7+ with minimal dependencies and are -[continuously extended by the community and MLCommons members](https://github.com/mlcommons/ck/blob/master/CONTRIBUTING.md) -to run natively on Ubuntu, MacOS, Windows, RHEL, Debian, Amazon Linux -and any other operating system, in a cloud or inside automatically generated containers -while keeping backward compatibility. -CM scripts were originally developed based on the following requirements from the -[MLCommons members](https://mlcommons.org) -to help them automatically compose and optimize complex MLPerf benchmarks, applications and systems -across diverse and continuously changing models, data sets, software and hardware -from Nvidia, Intel, AMD, Google, Qualcomm, Amazon and other vendors: -* must work out of the box with the default options and without the need to edit some paths, environment variables and configuration files; -* must be non-intrusive, easy to debug and must reuse existing - user scripts and automation tools (such as cmake, make, ML workflows, - python poetry and containers) rather than substituting them; -* must have a very simple and human-friendly command line with a Python API and minimal dependencies; -* must require minimal or zero learning curve by using plain Python, native scripts, environment variables - and simple JSON/YAML descriptions instead of inventing new workflow languages; -* must have the same interface to run all automations natively, in a cloud or inside containers. - -### Author and maintainer +## License -* [Grigori Fursin](https://cKnowledge.org/gfursin) (FlexAI, cTuning) +[Apache 2.0](LICENSE.md) -### Repositories powered by CM +## Copyright -* [CM4MLOPS / CM4MLPerf](https://github.com/mlcommons/cm4mlops) - - a collection of portable, extensible and technology-agnostic automation recipes - with a common CLI and Python API (CM scripts) to unify and automate - all the manual steps required to compose, run, benchmark and optimize complex ML/AI applications - on diverse platforms with any software and hardware: see [online catalog at CK playground](https://access.cknowledge.org/playground/?action=scripts), - [online MLCommons catalog](https://docs.mlcommons.org/cm4mlops/scripts) +Copyright (c) 2021-2025 MLCommons -* [CM interface to run MLPerf inference benchmarks](https://docs.mlcommons.org/inference) +Grigori Fursin, the cTuning foundation and OctoML donated this project to MLCommons to benefit everyone. -* [CM4ABTF](https://github.com/mlcommons/cm4abtf) - a unified CM interface and automation recipes - to run automotive benchmark across different models, data sets, software and hardware from different vendors. +Copyright (c) 2014-2021 cTuning foundation -* [CM4Research](https://github.com/ctuning/cm4research) - a unified CM interface an automation recipes - to make it easier to reproduce results from published research papers. +## Author +* [Grigori Fursin](https://cKnowledge.org/gfursin) (FlexAI, cTuning) -### Resources +## Maintainers -* CM v2.x (2022-cur) (stable): [installation on Linux, Windows, MacOS](https://access.cknowledge.org/playground/?action=install) ; - [docs](https://docs.mlcommons.org/ck) ; [popular commands](https://github.com/mlcommons/ck/tree/master/cm/docs/demos/some-cm-commands.md) ; - [getting started guide](https://github.com/mlcommons/ck/blob/master/docs/getting-started.md) -* CM v3.x aka CMX (2024-cur) (stable): [docs](https://github.com/orgs/mlcommons/projects/46) -* MLPerf inference benchmark automated via CM - * [Run MLPerf for submissions](https://docs.mlcommons.org/inference) - * [Run MLPerf at the Student Cluster Competition'24](https://docs.mlcommons.org/inference/benchmarks/text_to_image/reproducibility/scc24) -* Examples of modular containers and GitHub actions with CM commands: - * [GitHub action with CM commands to test MLPerf inference benchmark](https://github.com/mlcommons/inference/blob/master/.github/workflows/test-bert.yml) - * [Dockerfile to run MLPerf inference benchmark via CM](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/app-mlperf-inference/dockerfiles/bert-99.9/ubuntu_22.04_python_onnxruntime_cpu.Dockerfile) +* CM, CM4MLOps and MLPerf automations: [MLCommons infra WG](https://mlcommons.org) +* CMX (the next generation of CM since 2025): [Grigori Fursin](https://cKnowledge.org/gfursin) -### License +## Long-term vision -[Apache 2.0](LICENSE.md) +To learn more about the motivation behind this project, please explore the following presentations: -### Citing Collective Mind project +* "Enabling more efficient and cost-effective AI/ML systems with Collective Mind, virtualized MLOps, MLPerf, Collective Knowledge Playground and reproducible optimization tournaments": [ [ArXiv](https://arxiv.org/abs/2406.16791) ] +* ACM REP'23 keynote about the MLCommons CM automation framework: [ [slides](https://doi.org/10.5281/zenodo.8105339) ] +* ACM TechTalk'21 about Collective Knowledge project: [ [YouTube](https://www.youtube.com/watch?v=7zpeIVwICa4) ] [ [slides](https://learning.acm.org/binaries/content/assets/leaning-center/webinar-slides/2021/grigorifursin_techtalk_slides.pdf) ] +* Journal of Royal Society'20: [ [paper](https://royalsocietypublishing.org/doi/10.1098/rsta.2020.0211) ] -If you found CM automations help, please cite this article: -[ [ArXiv](https://arxiv.org/abs/2406.16791) ], [ [BibTex](https://github.com/mlcommons/ck/blob/master/citation.bib) ]. +## Documentation -History: [CM and CM automations for MLOps and MLPerf](https://github.com/mlcommons/ck/blob/master/HISTORY.CM.md). +* [White paper](https://arxiv.org/abs/2406.16791) +* [CMX architecture](https://github.com/mlcommons/ck/tree/master/docs/specs/cm-diagram-v3.5.1.png) +* [CMX installation GUI](https://access.cknowledge.org/playground/?action=install) -You can learn more about the motivation behind these projects from the following presentations: +*TBD* -* "Enabling more efficient and cost-effective AI/ML systems with Collective Mind, virtualized MLOps, MLPerf, Collective Knowledge Playground and reproducible optimization tournaments": [ [ArXiv](https://arxiv.org/abs/2406.16791) ] -* ACM REP'23 keynote about the MLCommons CM automation framework: [ [slides](https://doi.org/10.5281/zenodo.8105339) ] -* ACM TechTalk'21 about Collective Knowledge project: [ [YouTube](https://www.youtube.com/watch?v=7zpeIVwICa4) ] [ [slides](https://learning.acm.org/binaries/content/assets/leaning-center/webinar-slides/2021/grigorifursin_techtalk_slides.pdf) ] ### Acknowledgments -Collective Mind (CM) was originally developed by [Grigori Fursin](https://cKnowledge.org/gfursin), -as a part of the [Collective Knowledge educational initiative](https://cKnowledge.org), -sponsored by [cTuning.org](https://cTuning.org) and [cKnowledge.org](https://cKnowledge.org), -and contributed to MLCommons for the benefit of all. +This open-source project was created by [Grigori Fursin](https://cKnowledge.org/gfursin) +and sponsored by cTuning.org, OctoAI and HiPEAC. +Grigori donated this project to MLCommons to modularize and automate MLPerf benchmarks, +benefit the community, and foster its development as a collaborative, community-driven effort. -This open-source technology, including CM4MLOps and MLPerf automations, CM4ABTF, CM4Research, and more, -is a collaborative project supported by [MLCommons](https://mlcommons.org), -[FlexAI](https://flex.ai), [cTuning](https://cTuning.org) -and our [amazing volunteers, collaborators, and contributors](https://github.com/mlcommons/ck/blob/master/CONTRIBUTING.md)! +We thank [MLCommons](https://mlcommons.org), [FlexAI](https://flex.ai) +and [cTuning](https://cTuning.org) for supporting this project, +as well as our dedicated [volunteers and collaborators](https://github.com/mlcommons/ck/blob/master/CONTRIBUTORS.md) +for their feedback and contributions! + +If you found the CM automations helpful, kindly reference this article: +[ [ArXiv](https://arxiv.org/abs/2406.16791) ], [ [BibTex](https://github.com/mlcommons/ck/blob/master/citation.bib) ]. diff --git a/cm/cmind/__init__.py b/cm/cmind/__init__.py index 3c2df1265..ad29c0cb4 100644 --- a/cm/cmind/__init__.py +++ b/cm/cmind/__init__.py @@ -1,4 +1,4 @@ -# Collective Mind init +# Common Metadata eXchange and Collective Mind init # # Author(s): Grigori Fursin # Contributor(s): @@ -9,7 +9,7 @@ # White paper: https://arxiv.org/abs/2406.16791 # Project contributors: https://github.com/mlcommons/ck/blob/master/CONTRIBUTING.md -__version__ = "3.5.3.1" +__version__ = "4.0.0" from cmind.core import access from cmind.core import x diff --git a/cm/cmind/repo/automation/mlc/_cm.json b/cm/cmind/repo/automation/mlc/_cm.json new file mode 100644 index 000000000..9c6e0dada --- /dev/null +++ b/cm/cmind/repo/automation/mlc/_cm.json @@ -0,0 +1,12 @@ +{ + "alias": "mlc", + "automation_alias": "automation", + "automation_uid": "bbeb15d8f0a944a4", + "desc": "CMX interface for mlcflow", + "sort": -1000, + "tags": [ + "automation", + "mlc" + ], + "uid": "26193c384fae476c" +} diff --git a/cm/cmind/repo/automation/mlc/module.py b/cm/cmind/repo/automation/mlc/module.py new file mode 100644 index 000000000..a1544d19d --- /dev/null +++ b/cm/cmind/repo/automation/mlc/module.py @@ -0,0 +1,35 @@ +# CMX interface for mlcflow + +import os + +from cmind.automation import Automation +from cmind import utils +from cmind import cli + +class CAutomation(Automation): + """ + CMX interface for mlcflow + """ + + ############################################################ + def __init__(self, cmind, automation_file): + super().__init__(cmind, __file__) + + ############################################################ + def run(self, i): + """ + CMX interface for mlcflow + + """ + + _cmd = i['control']['_cmd'][2:] + + cmd = 'mlc ' + ' '.join(_cmd) + + returncode = os.system(cmd) + + r = {'return': returncode} + if returncode > 0: + r['error'] = 'mlc command failed' + + return r diff --git a/cm/cmind/repo/automation/mlcr/_cm.json b/cm/cmind/repo/automation/mlcr/_cm.json new file mode 100644 index 000000000..ed87d7d27 --- /dev/null +++ b/cm/cmind/repo/automation/mlcr/_cm.json @@ -0,0 +1,12 @@ +{ + "alias": "mlcr", + "automation_alias": "automation", + "automation_uid": "bbeb15d8f0a944a4", + "desc": "CMX interface for mlcr", + "sort": -1000, + "tags": [ + "automation", + "mlcr" + ], + "uid": "f7d6f6786efd4e3c" +} diff --git a/cm/cmind/repo/automation/mlcr/module.py b/cm/cmind/repo/automation/mlcr/module.py new file mode 100644 index 000000000..8da93982d --- /dev/null +++ b/cm/cmind/repo/automation/mlcr/module.py @@ -0,0 +1,35 @@ +# CMX interface for mlcr + +import os + +from cmind.automation import Automation +from cmind import utils +from cmind import cli + +class CAutomation(Automation): + """ + CMX interface for mlcr + """ + + ############################################################ + def __init__(self, cmind, automation_file): + super().__init__(cmind, __file__) + + ############################################################ + def run(self, i): + """ + CMX interface for mlcr + + """ + + _cmd = i['control']['_cmd'][2:] + + cmd = 'mlcr ' + ' '.join(_cmd) + + returncode = os.system(cmd) + + r = {'return': returncode} + if returncode > 0: + r['error'] = 'mlcr command failed' + + return r diff --git a/cm/pyproject.toml b/cm/pyproject.toml index f0786bfdb..d2e1057d2 100644 --- a/cm/pyproject.toml +++ b/cm/pyproject.toml @@ -13,7 +13,7 @@ maintainers = [ {name = "Grigori Fursin", email = "grigori.fursin@ctuning.org"} ] -description = "Collective Mind automation framework (CM)" +description = "Common Metadata eXchange framework (CMX) and Collective Mind automation framework (CM)" requires-python = ">=3.7" @@ -30,6 +30,9 @@ keywords = [ "cmind", "cm", "cmx", + "cmx-mlcflow", + "cmx-mlcr", + "common metadata exchange", "collective mind", "automation", "portability", @@ -67,6 +70,8 @@ include-package-data = true cmind = ["repo/*", "repo/automation/automation/*", "repo/automation/ckx/*", + "repo/automation/mlc/*", + "repo/automation/mlcr/*", "repo/automation/core/*", "repo/automation/core/cm_60cb625a46b38610/*", "repo/automation/repo/*" diff --git a/cmx/README.md b/cmx/README.md index f74c85a06..c6c319c81 100644 --- a/cmx/README.md +++ b/cmx/README.md @@ -1,9 +1,24 @@ -# Collective Mind v3 aka CMX - -CMX represents the next generation of the CM automation framework. -Developed by Grigori Fursin, it builds upon valuable feedback -gathered from CM users. - +# Common Metadata eXchange (CMX) + +The [CMX framework](https://github.com/mlcommons/ck/tree/master/cmx) +facilitates the decomposition of complex software systems and benchmarks such as MLPerf +into portable, reusable, and interconnected automation recipes for MLOps and DevOps. +These recipes are developed and continuously improved by the community. + +***Starting in 2025, CMX V4.0.0 serves as drop-in, backward-compatible replacement + for the earlier [Collective Mind framework (CM)](https://github.com/mlcommons/ck/tree/master/cm), + while providing a simpler and more robust interface.*** + +CMX is a lightweight, Python-based toolset that provides a unified +command-line interface (CLI), a Python API, and minimal dependencies. +It is designed to help researchers and engineers automate repetitive, +time-consuming tasks such as building, running, benchmarking, and +optimizing AI, machine learning, and other applications across diverse and +constantly evolving models, data, software, and hardware. + +CMX is continuously enhanced through public and private Git repositories, +providing automation recipes and artifacts that are seamlessly accessible +via its unified interface. ## Documentation diff --git a/cmx/install.md b/cmx/install.md index 35db645f0..b4abb945f 100644 --- a/cmx/install.md +++ b/cmx/install.md @@ -1,15 +1,14 @@ [ [Back to index](README.md) ] -# CM Installation +# Common Metadata eXchange installation -CM framework requires minimal dependencies to run on any platform: `python 3.7+, pip, venv, git, git-lfs, wget, curl`. +CMX framework requires minimal dependencies to run on any platform: `python 3.7+, pip, venv, git, git-lfs, wget, curl`. -By default, CM will pull Git repositories and cache installations and downloaded files in your `$HOME/CM` directory on Linux and MacOS +By default, CMX will pull Git repositories and cache installations and downloaded files in your `$HOME/CM` directory on Linux and MacOS or `%userprofile%\CM` directory on Windows. You can change it to any another directory using the `CM_REPOS` environment variable, for example `export CM_REPOS=/scratch/CM`. -*Feel free to use the [online installation GUI](https://access.cknowledge.org/playground/?action=install-cmx)*. - +*Feel free to use the [online installation GUI](https://access.cknowledge.org/playground/?action=install)*. ## Ubuntu, Debian @@ -114,13 +113,16 @@ python -m pip install cmind -# CM CLI testing +# CMX CLI If the installation is successful, you can run the CM CLI as follows: ```bash gfursin@cmind:~$ cmx -cmx {action} {automation} {artifact(s)} {flags} @input.yaml @input.json +cmx {action} {automation} {artifact(s)} {CMX control flags (-)} {CMX automation flags (--)} ``` +```bash +gfursin@cmind:~$ cmx test core +``` diff --git a/cmx4mlops/README.md b/cmx4mlops/README.md index a0990367e..674e1da1f 100644 --- a/cmx4mlops/README.md +++ b/cmx4mlops/README.md @@ -1 +1,67 @@ -TBD +# Aggregated CM and CMX automations for MLOps and MLPerf + +[![License](https://img.shields.io/badge/License-Apache%202.0-green)](LICENSE.md) +[![Powered by CM/CMX](https://img.shields.io/badge/Powered_by-MLCommons%20CM-blue)](https://pypi.org/project/cmind). + +This repository is powered by the [Collective Mind workflow automation framework](https://github.com/mlcommons/ck/tree/master/cm). + +Two key automations developed using CM are **Script** and **Cache**, which streamline machine learning (ML) workflows, +including managing Docker runs. Both Script and Cache automations are part of the **cmx4mlops** repository. + +The [CM scripts](https://access.cknowledge.org/playground/?action=scripts), +also housed in this repository, consist of hundreds of modular Python-wrapped scripts accompanied +by `yaml` metadata, enabling the creation of robust and flexible ML workflows. + +## License + +[Apache 2.0](LICENSE.md) + +## Copyright + +© 2022-2025 MLCommons. All Rights Reserved. + +Grigori Fursin, the cTuning foundation and OctoML donated the CK and CM projects to MLCommons to benefit everyone and encourage collaborative development. + +## Maintainers + +* CM, CM4MLOps and MLPerf automations: MLCommons +* CMX (the next generation of CM): Grigori Fursin + +## Author + +[Grigori Fursin](https://cKnowledge.org/gfursin) + +We sincerely appreciate all [contributors](https://github.com/mlcommons/ck/blob/master/CONTRIBUTORS.md) +for their invaluable feedback and support! + +## Concepts + +Check our [ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) and the [white paper](https://arxiv.org/abs/2406.16791). + +## Test image classification and MLPerf R-GAT inference benchmark via CMX PYPI package + +```bash +pip install cmind +pip install cmx4mlops +cmx run script "python app image-classification onnx" --quiet +cmx run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --adr.inference-src.tags=_branch.dev --pull_changes=yes --pull_inference_changes=yes --submitter="MLCommons" --hw_name=ubuntu-latest_x86 --model=rgat --implementation=python --backend=pytorch --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --category=datacenter --quiet --v --target_qps=1 +``` + +## Test image classification and MLPerf R-GAT inference benchmark via CMX GitHub repo + +```bash +pip uninstall cmx4mlops +pip install cmind +cmx pull repo mlcommons@ck --dir=cmx4mlops/cmx4mlops +cmx run script "python app image-classification onnx" --quiet +cmx run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --adr.inference-src.tags=_branch.dev --pull_changes=yes --pull_inference_changes=yes --submitter="MLCommons" --hw_name=ubuntu-latest_x86 --model=rgat --implementation=python --backend=pytorch --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --category=datacenter --quiet --v --target_qps=1 +``` + +## Parent project + +Visit the [parent Collective Knowledge project](https://github.com/mlcommons/ck) for further details. + +## Citing this project + +If you found the CM automations helpful, kindly reference this article: +[ [ArXiv](https://arxiv.org/abs/2406.16791) ] diff --git a/cmx4mlops/cmx4mlops/CONTRIBUTING.md b/cmx4mlops/cmx4mlops/CONTRIBUTING.md new file mode 100644 index 000000000..0975ca9ef --- /dev/null +++ b/cmx4mlops/cmx4mlops/CONTRIBUTING.md @@ -0,0 +1,16 @@ +### Contributing to the MLCommons + +The best way to contribute to the MLCommons is to get involved with one of our many project communities. +You find more information about getting involved with MLCommons [here](https://mlcommons.org/en/get-involved/#getting-started). + +Generally we encourage people to become a MLCommons member if they wish to contribute to MLCommons projects, +but outside pull requests are very welcome too. + +Regardless of if you are a member, your organization needs to sign the MLCommons CLA. +Please fill out this [CLA sign up form](https://forms.gle/Ew1KkBVpyeJDuRw67) form to get started. + +MLCommons project work is tracked with issue trackers and pull requests. +Modify the project in your own fork and issue a pull request once you want other developers +to take a look at what you have done and discuss the proposed changes. +Ensure that cla-bot and other checks pass for your Pull requests. + diff --git a/cmx4mlops/cmx4mlops/CONTRIBUTORS.md b/cmx4mlops/cmx4mlops/CONTRIBUTORS.md new file mode 100644 index 000000000..cd3b7effd --- /dev/null +++ b/cmx4mlops/cmx4mlops/CONTRIBUTORS.md @@ -0,0 +1,74 @@ +Collective Knowledge (CK), Collective Mind (CM) and Common Metadata eXchange (CMX) +were created by [Grigori Fursin](https://arxiv.org/abs/2406.16791), +sponsored by cKnowledge.org and cTuning.org, and donated to MLCommons +to benefit everyone. Since then, this open-source automation technology +(CM, CMX, MLPerf automations, etc) is being extended +as a community effort thanks to all our volunteers, collaborators +and contributors listed here in alphabetical order: + +* @Henryfzh +* @Leonard226 +* @Oseltamivir +* @Submandarine +* Resmi Arjun +* Omar Benjelloun (Google) +* Alice Cheng (Nvidia) +* Jiahao Chen (MIT) +* Ramesh N Chukka (Intel) +* Ray DeMoss (One Stop Systems) +* Ryan T DeRue (Purdue University) +* Himanshu Dutta (Indian Institute of Technology) +* Nicolas Essayan +* Justin Faust (One Stop Systems) +* Diane Feddema (Red Hat) +* Leonid Fursin (United Silicon Carbide) +* Anirban Ghosh (Nvidia) +* James Goel (Qualcomm) +* Michael Goin (Neural Magic) +* Jose Armando Hernandez (Paris Saclay University) +* Mehrdad Hessar (OctoML) +* Miro Hodak (AMD) +* Sachin Idgunji (Nvidia) +* Tom Jablin (Google) +* Nino Jacob +* David Kanter (MLCommons) +* Alex Karargyris +* Jason Knight (OctoML) +* Ilya Kozulin (Deelvin) +* @makaveli10 (Collabora) +* Steve Leak(NERSC) +* Amija Maji (Purdue University) +* Peter Mattson (Google, MLCommons) +* Kasper Mecklenburg (Arm) +* Pablo Gonzalez Mesa +* Thierry Moreau (OctoML) +* Sachin Mudaliyar +* Stanley Mwangi (Microsoft) +* Ashwin Nanjappa (Nvidia) +* Hai Ah Nam (NERSC) +* Nandeeka Nayak (UIUC) +* Datta Nimmaturi (Nutanix) +* Lakshman Patel +* Arun Tejusve Raghunath Rajan (Cruise) +* Vijay Janapa Reddi (Harvard University) +* Andrew Reusch (OctoML) +* Anandhu Sooraj (Kerala Technical University) +* Sergey Serebryakov (HPE) +* Warren Schultz (Principled Technologies) +* Amrutha Sheleenderan (Kerala Technical University) +* Micah J Sheller (Intel) +* Byoungjun Seo (TTA) +* Aditya Kumar Shaw (Indian Institute of Science) +* Ilya Slavutin (Deelvin) +* Jinho Suh (Nvidia) +* Arjun Suresh +* Badhri Narayanan Suresh (Intel) +* David Tafur (MLCommons) +* Chloe Tessier +* Gaurav Verma (Stony Brook University) +* Zixian Wang +* Nathan Wasson +* Scott Wasson (MLCommons) +* Haoyang Zhang (UIUC) +* Bojian Zheng (University of Toronto) +* Thomas Zhu (Oxford University) diff --git a/cmx4mlops/cmx4mlops/COPYRIGHT.txt b/cmx4mlops/cmx4mlops/COPYRIGHT.txt new file mode 100644 index 000000000..ea868e078 --- /dev/null +++ b/cmx4mlops/cmx4mlops/COPYRIGHT.txt @@ -0,0 +1,3 @@ +Copyright (c) 2021-2025 MLCommons + +Grigori Fursin, the cTuning foundation and OctoML donated this project to MLCommons to benefit everyone. diff --git a/cmx4mlops/cmx4mlops/HISTORY.md b/cmx4mlops/cmx4mlops/HISTORY.md new file mode 100644 index 000000000..4921bc0b9 --- /dev/null +++ b/cmx4mlops/cmx4mlops/HISTORY.md @@ -0,0 +1,127 @@ +This document narrates the history of the creation and design of CM, CM4MLOps and MLPerf automations (also known as CK2) +by [Grigori Fursin](https://cKnowledge.org/gfursin). It also highlights the donation of this open-source technology to MLCommons, +aimed at benefiting the broader community and fostering its ongoing development as a collaborative, community-driven initiative: + +* Jan 28, 2021: After delivering an invited ACM TechTalk'21 about the Collective Knowledge framework (CK1) + and reproducibility initiatives for conferences, as well as CK-MLOps and MLPerf automations, + Grigori received useful feedback and suggestions for improvements to workflow automations: + https://learning.acm.org/techtalks/reproducibility. + + Following this, Grigori began prototyping CK2 (later CM) to streamline CK1, CK-MLOps and MLPerf benchmarking. + The goal was to dramatically simplify CK1 workflows by introducing just a few core and portable automations, + which eventually evolved into `CM script` and `CM cache`. + + At that time, the cTuning foundation hosted CK1 and all the prototypes for the CM framework at https://github.com/ctuning/ck: + [ref1](https://github.com/mlcommons/ck/commit/9e57934f4999db23052531e92160772ab831463a), + [ref2](https://github.com/mlcommons/ck/tree/9e57934f4999db23052531e92160772ab831463a), + [ref3](https://github.com/mlcommons/ck/tree/9e57934f4999db23052531e92160772ab831463a/incubator). + +* Sep 23, 2021: donated CK1, CK-MLOps, MLPerf automations and early prototypes of CM from the cTuning repository to MLCommons: + [ref1](https://web.archive.org/web/20240803140223/https://octo.ai/blog/octoml-joins-the-community-effort-to-democratize-mlperf-inference-benchmarking), + [ref2](https://github.com/mlcommons/ck/tree/228f80b0bf44610c8244ff0c3f6bec5bbd25aa6c/incubator), + [ref3](https://github.com/mlcommons/ck/tree/695c3843fd8121bbdde6c453cd6ec9503986b0c6?tab=readme-ov-file#author-and-coordinator), + [ref4](https://github.com/mlcommons/ck/tree/master/ck), + [ref5](https://github.com/mlcommons/ck-mlops). + + Prepared MLCommons proposal for the creation of the [MLCommons Task Force on Automation and Reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md), + aimed at fostering community-driven support for CK and CM developments to benefit everyone. + +* Jan, 2022: hired Arjun Suresh at OctoML to support and maintain CK1 framework and help prepare OctoML's MLPerf submissions using CK1. + Meanwhile, transitioned to focusing on CM and CM-MLOps development, building upon the prototypes created in 2021. + +* Mar 1, 2022: started developing cm-mlops: [ref](https://github.com/octoml/cm-mlops/commit/0ae94736a420dfa84f7417fc62d323303b8760c6). + +* Mar 24, 2022: after successfully stabilizing the initial prototype of CM, donated it to MLCommons to benefit the entire community: + [ref1](https://github.com/mlcommons/ck/tree/c7918ad544f26b6c499c2fc9c07431a9640fca5a/ck2), + [ref2](https://github.com/mlcommons/ck/tree/c7918ad544f26b6c499c2fc9c07431a9640fca5a/ck2#coordinators), + [ref3](https://github.com/mlcommons/ck/commit/3c146cb3c75a015363f7a96758adf6dcc43032d6), + [ref4](https://github.com/mlcommons/ck/commit/3c146cb3c75a015363f7a96758adf6dcc43032d6#diff-d97f0f6f5a32f16d6ed18b9600ffc650f7b25512685f7a2373436c492c6b52b3R48). + +* Apr 6, 2022: started transitioning previous MLOps and MLPerf automations from the mlcommons/ck-mlops format + to the new CM format using the cm-mlops repository (will be later renamed to cm4mlops): + [ref1](https://github.com/octoml/cm-mlops/commit/d1efdc30fb535ce144020d4e88f3ed768c933176), + [ref2](https://github.com/octoml/cm-mlops/blob/d1efdc30fb535ce144020d4e88f3ed768c933176/CONTRIBUTIONS). + +* Apr 22, 2022: began architecting "Intelligent Components" in the CM-MLOps repository, + which will be renamed to `CM Script` at a later stage: + [ref1](https://github.com/octoml/cm-mlops/commit/b335c609c47d2c547afe174d9df232652d57f4f8), + [ref2](https://github.com/octoml/cm-mlops/tree/b335c609c47d2c547afe174d9df232652d57f4f8), + [ref3](https://github.com/octoml/cm-mlops/blob/b335c609c47d2c547afe174d9df232652d57f4f8/CONTRIBUTIONS). + + At the same time, prototyped other core CM automations, including IC, Docker, and Experiment: + [ref1](https://github.com/octoml/cm-mlops/tree/b335c609c47d2c547afe174d9df232652d57f4f8/automation), + [ref2](https://github.com/mlcommons/ck/commits/master/?before=7f66e2438bfe21b4ce2d08326a5168bb9e3132f6+7001). + +* Apr 28, 2022: donated CM-MLOps to MLCommons, which was later renamed to CM4MLOps: + [ref](https://github.com/mlcommons/ck/commit/456e4861056c0e39c4d689c03da91f90a44be058). + +* May 9, 2022: developed the initial set of core IC automations for MLOps (aka CM scripts): + [ref1](https://github.com/octoml/cm-mlops/commit/4a4a027f4088ce7e7abcec29c39d98981bf09d4c), + [ref2](https://github.com/octoml/cm-mlops/tree/4a4a027f4088ce7e7abcec29c39d98981bf09d4c), + [ref3](https://github.com/octoml/cm-mlops/blob/7692240becd6397a96c3975388913ea082002e7a/CONTRIBUTIONS). + +* May 11, 2022: After successfully prototyping CM and CM-MLOps, deprecated the CK1 framework in favor of CM. + Transferred Arjun Suresh to the CM project as a maintainer and tester for CM and CM-MLOps: + [ref](https://github.com/octoml/cm-mlops/blob/17405833665bc1e93820f9ff76deb28a0f543bdb/CONTRIBUTIONS). + + Created a [file](https://github.com/mlcommons/ck/blob/master/cm-mlops/CHANGES.md) + to document and track our public developments at MLCommons. + +* Jun 8, 2022: renamed the 'IC' automation to the more intuitive 'CM script' automation. + [ref1](https://github.com/mlcommons/ck/tree/5ca4e2c33e58a660ac20a545d8aa5143ab6e8e81/cm-devops/automation/script), + [ref2](https://github.com/mlcommons/ck/tree/5ca4e2c33e58a660ac20a545d8aa5143ab6e8e81), + [ref3](https://github.com/octoml/cm-mlops/commit/7910fb7ffc62a617d987d2f887d6f9981ff80187). + +* Jun 16, 2022: prototyped the `CM cache` automation to facilitate caching and reuse of the outputs from CM scripts: + [ref1](https://github.com/mlcommons/ck/commit/1f81aae8cebd5567ec4ca55f693beaf32b49fb48), + [ref2](https://github.com/mlcommons/ck/tree/1f81aae8cebd5567ec4ca55f693beaf32b49fb48), + [ref3](https://github.com/mlcommons/ck/tree/1f81aae8cebd5567ec4ca55f693beaf32b49fb48?tab=readme-ov-file#contacts). + +* Sep 6, 2022: delivered CM demo to run MLPerf while deprecating CK1 automations for MLPerf: + [ref1](https://github.com/mlcommons/ck/commit/2c5d5c5c944ae5f252113c62af457c7a4c5e877a#diff-faac2c4ecfd0bfb928dafc938d3dad5651762fbb504a2544752a337294ee2573R224), + [ref2](https://github.com/mlcommons/ck/blob/2c5d5c5c944ae5f252113c62af457c7a4c5e877a/CONTRIBUTING.md#author-and-coordinator). + + Welcomed Arjun Suresh as a contributor to CM automations for MLPerf: [ref](https://github.com/mlcommons/ck/blob/2c5d5c5c944ae5f252113c62af457c7a4c5e877a/CONTRIBUTING.md#contributors-in-alphabetical-order). + +* From September 2022: coordinated community development of CM and CM4MLOps + to [modularize and automate MLPerf benchmarks](https://docs.mlcommons.org/inference) + and support [reproducibility initiatives at ML and Systems conferences](https://cTuning.or/ae) + through the [MLCommons Task Force on Automation and Reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md). + + * Directed and financed the creation of (CM) automations to streamline the MLPerf power measurement processes. + + * Proposed to use MLPerf benchmarks for the Student Cluster Competition, led the developments + and prepared a tutorial to run MLPerf inference at SCC'22 via CM: [ref](https://github.com/mlcommons/ck/blob/master/docs/tutorials/sc22-scc-mlperf.md) + +* April 2023: departed OctoML to focus on the development of the [CK playground](https://access.cKnowledge.org) and CM automations + to make Mlperf accessible to everyone. Hired Arjun Suresh to help with developments. + + * Initiated and funded development of the [MLPerf explorer](https://github.com/ctuning/q2a-mlperf-visualizer) + to improve visualization of results + +* August 2023: organized the 1st mass-scale MLPerf community submission of 12217 inference benchmark v3.1 results + out of total 13351 results (including 90% of all power results) across diverse models, software and hardware + from different vendors via [open challenges](https://access.cknowledge.org/playground/?action=challenges) funded by cTuning.org : + [LinkedIn article](https://www.linkedin.com/pulse/new-milestone-make-mlperf-benchmarks-accessible-everyone-fursin/) + with results visualized by the [MLPerf explorer](https://github.com/ctuning/q2a-mlperf-visualizer), + [CM4MLOps challenges at GitHub](https://github.com/mlcommons/cm4mlops/tree/main/challenge). + +* February, 2024: proposed to use CM to automate [MLPerf automotive benchmark (ABTF)](https://mlcommons.org/working-groups/benchmarks/automotive/). + + * moved my prototypes of the CM automation for ABTF to cm4abtf repo: [ref](https://github.com/mlcommons/cm4abtf/commit/f92b9f464de89a38a4bde149290dede2d94c8631) + * led further CM4ABTF developments funded by cTuning.org. + +* Starting in April 2024, began the gradual transfer of ongoing maintenance and enhancement + responsibilities for CM and CM4MLOps, including MLPerf automations, to MLCommons. + Welcomed Anandhu Sooraj as a maintainer and contributor to CM4MLOps with MLPerf automations. + +* Took a break from all development activities. + +* July 2024: started prototyping the next generation of CM (CMX and CMX4MLOps) with simpler interfaces + based on user feedback while maintaining backward compatibility. + +* 2025: continue developing CMX and CMX4MLOPs to make it easier to run and customize MLPerf inference, training + and other benchmarks across diverse models, datasets, software and hardware. + +For more details, please refer to the [white paper](https://arxiv.org/abs/2406.16791) +and the [ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339). diff --git a/cmx4mlops/cmx4mlops/LICENSE.md b/cmx4mlops/cmx4mlops/LICENSE.md new file mode 100644 index 000000000..66a27ec5f --- /dev/null +++ b/cmx4mlops/cmx4mlops/LICENSE.md @@ -0,0 +1,177 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + diff --git a/cmx4mlops/cmx4mlops/LICENSE.third-party.md b/cmx4mlops/cmx4mlops/LICENSE.third-party.md new file mode 100644 index 000000000..faa008458 --- /dev/null +++ b/cmx4mlops/cmx4mlops/LICENSE.third-party.md @@ -0,0 +1 @@ +This CM repository may contain CM scripts with third-party files licensed under Apache2, BSD or MIT license. diff --git a/cmx4mlops/cmx4mlops/VERSION b/cmx4mlops/cmx4mlops/VERSION index 4e379d2bf..3eefcb9dd 100644 --- a/cmx4mlops/cmx4mlops/VERSION +++ b/cmx4mlops/cmx4mlops/VERSION @@ -1 +1 @@ -0.0.2 +1.0.0 diff --git a/cmx4mlops/cmx4mlops/cmr.yaml b/cmx4mlops/cmx4mlops/cmr.yaml index 0a67a3b12..4e2dbf480 100644 --- a/cmx4mlops/cmx4mlops/cmr.yaml +++ b/cmx4mlops/cmx4mlops/cmr.yaml @@ -3,12 +3,12 @@ uid: 428611a6db02407f git: true -version: "0.0.2" +version: "0.5.1" -author: "Grigori Fursin" +author: "Grigori Fursin and contributors" install_python_requirements: false -min_cm_version: "3.5.3" +min_cm_version: "3.5.2" prefix: repo diff --git a/cmx4mlops/cmx4mlops/repo/README.md b/cmx4mlops/cmx4mlops/repo/README.md index 49bd226a8..e69de29bb 100644 --- a/cmx4mlops/cmx4mlops/repo/README.md +++ b/cmx4mlops/cmx4mlops/repo/README.md @@ -1,67 +0,0 @@ -## Unified and cross-platform CM interface for DevOps, MLOps and MLPerf - -[![License](https://img.shields.io/badge/License-Apache%202.0-green)](LICENSE.md) -[![Python Version](https://img.shields.io/badge/python-3+-blue.svg)](https://github.com/mlcommons/ck/tree/master/cm/cmind) -[![Powered by CM](https://img.shields.io/badge/Powered_by-MLCommons%20CM-blue)](https://pypi.org/project/cmind). -[![Downloads](https://static.pepy.tech/badge/cm4mlops)](https://pepy.tech/project/cm4mlops) - -[![CM script automation features test](https://github.com/mlcommons/cm4mlops/actions/workflows/test-cm-script-features.yml/badge.svg)](https://github.com/mlcommons/cm4mlops/actions/workflows/test-cm-script-features.yml) -[![MLPerf inference bert (deepsparse, tf, onnxruntime, pytorch)](https://github.com/mlcommons/cm4mlops/actions/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml/badge.svg)](https://github.com/mlcommons/cm4mlops/actions/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml) -[![MLPerf inference MLCommons C++ ResNet50](https://github.com/mlcommons/cm4mlops/actions/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml/badge.svg)](https://github.com/mlcommons/cm4mlops/actions/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml) -[![MLPerf inference ABTF POC Test](https://github.com/mlcommons/cm4mlops/actions/workflows/test-mlperf-inference-abtf-poc.yml/badge.svg)](https://github.com/mlcommons/cm4mlops/actions/workflows/test-mlperf-inference-abtf-poc.yml) -[![Test Compilation of QAIC Compute SDK (build LLVM from src)](https://github.com/mlcommons/cm4mlops/actions/workflows/test-qaic-compute-sdk-build.yml/badge.svg)](https://github.com/mlcommons/cm4mlops/actions/workflows/test-qaic-compute-sdk-build.yml) -[![Test QAIC Software kit Compilation](https://github.com/mlcommons/cm4mlops/actions/workflows/test-qaic-software-kit.yml/badge.svg)](https://github.com/mlcommons/cm4mlops/actions/workflows/test-qaic-software-kit.yml) - - -# CM4MLOps repository - -**CM4MLOps** repository is powered by the [Collective Mind automation framework](https://github.com/mlcommons/ck/tree/master/cm), -a [Python package](https://pypi.org/project/cmind/) with a CLI and API designed for creating and managing automations. - -Two key automations developed using CM are **Script** and **Cache**, which streamline machine learning (ML) workflows, -including managing Docker runs. Both Script and Cache automations are part of the **cm4mlops** repository. - -The CM scripts, also housed in this repository, consist of hundreds of modular Python-wrapped scripts accompanied -by `yaml` metadata, enabling the creation of robust and flexible ML workflows. - -- **CM Scripts Documentation**: [https://docs.mlcommons.org/cm4mlops/](https://docs.mlcommons.org/cm4mlops/) -- **CM CLI Documentation**: [https://docs.mlcommons.org/ck/specs/cm-cli/](https://docs.mlcommons.org/ck/specs/cm-cli/) - -The `mlperf-branch` of the **cm4mlops** repository is dedicated to developments specific to MLPerf Inference. -Please submit any pull requests (PRs) to this branch. For more information about using CM for MLPerf Inference, -refer to the [MLPerf Inference Documentation](https://docs.mlcommons.org/inference/). - -## License - -[Apache 2.0](LICENSE.md) - -## Copyright - -© 2022-2025 MLCommons. All Rights Reserved. - -Grigori Fursin, the cTuning foundation and OctoML donated the CK and CM projects to MLCommons to benefit everyone and encourage collaborative development. - -## Maintainer(s) - -* MLCommons - -## CM author - -[Grigori Fursin](https://cKnowledge.org/gfursin) - -## CM concepts - -Check our [ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) and the [white paper](https://arxiv.org/abs/2406.16791). - -## CM script developers - -Arjun Suresh, Anandhu Sooraj, Grigori Fursin - -## Parent project - -Visit the [parent Collective Knowledge project](https://github.com/mlcommons/ck) for further details. - -## Citing this project - -If you found the CM automations helpful, kindly reference this article: -[ [ArXiv](https://arxiv.org/abs/2406.16791) ] diff --git a/cmx4mlops/cmx4mlops/repo/automation/cache/module_misc.py b/cmx4mlops/cmx4mlops/repo/automation/cache/module_misc.py index d83d9f763..82b7c1cf1 100644 --- a/cmx4mlops/cmx4mlops/repo/automation/cache/module_misc.py +++ b/cmx4mlops/cmx4mlops/repo/automation/cache/module_misc.py @@ -1,5 +1,5 @@ # Author: Grigori Fursin -# Contributors: https://github.com/mlcommons/ck/blob/master/CONTRIBUTING.md +# Contributors: Arjun Suresh, Anandhu Sooraj # # Copyright: https://github.com/mlcommons/ck/blob/master/cm-mlops/COPYRIGHT.md # License: https://github.com/mlcommons/ck/blob/master/cm-mlops/LICENSE.md @@ -104,11 +104,9 @@ def copy_to_remote(i): cm_cache_path = os.path.realpath( os.path.join(cm_repos_path, "local", "cache")) - for key, val in new_env.items(): - - -if isinstance(val, if ) new_env[key] = val.replace( - cm_cache_path, remote_cm_cache_location) + for key,val in new_env.items(): + if type(val) == str and cm_cache_path in val: + new_env[key] = val.replace(cm_cache_path, remote_cm_cache_location) with open("tmp_remote_cached_state.json", "w") as f: json.dump(cm_cached_state, f, indent=2) diff --git a/cmx4mlops/cmx4mlops/repo/automation/cmx-demo/modulex.py b/cmx4mlops/cmx4mlops/repo/automation/cmx-demo/modulex.py index f63b0d44b..ac78286e1 100644 --- a/cmx4mlops/cmx4mlops/repo/automation/cmx-demo/modulex.py +++ b/cmx4mlops/cmx4mlops/repo/automation/cmx-demo/modulex.py @@ -3,14 +3,9 @@ # Author(s): Grigori Fursin # Contributor(s): # -# Copyright: https://github.com/mlcommons/ck/blob/master/COPYRIGHT.md +# Copyright: https://github.com/mlcommons/ck/blob/master/COPYRIGHT.txt # License: https://github.com/mlcommons/ck/blob/master/LICENSE.md -# -# White paper: https://arxiv.org/abs/2406.16791 -# History: https://github.com/mlcommons/ck/blob/master/HISTORY.CM.md -# -# CK and CM project contributors: https://github.com/mlcommons/ck/blob/master/CONTRIBUTING.md -# +# Project contributors: https://github.com/mlcommons/ck/blob/master/CONTRIBUTING.md import os @@ -81,14 +76,3 @@ def test(self, i): return {'return':0} - - ############################################################ - def run(self, i): - - import json - print (json.dumps(i, indent=2)) - - v = i.get('test', 'default') - v2 = i.get('test2', 'default') - - return {'return':0, 'new_key':v, 'new_key2':v2} diff --git a/cmx4mlops/cmx4mlops/repo/automation/experiment/module.py b/cmx4mlops/cmx4mlops/repo/automation/experiment/module.py index c83e7c049..db3b5da1e 100644 --- a/cmx4mlops/cmx4mlops/repo/automation/experiment/module.py +++ b/cmx4mlops/cmx4mlops/repo/automation/experiment/module.py @@ -280,8 +280,8 @@ def run(self, i): explore_keys.append(k) - -if not isinstance(v, if ) v = eval(v) + if type(v)!=list: + v=eval(v) explore_dimensions.append(v) diff --git a/cmx4mlops/cmx4mlops/repo/automation/script/module.py b/cmx4mlops/cmx4mlops/repo/automation/script/module.py index b08875892..5860e6e65 100644 --- a/cmx4mlops/cmx4mlops/repo/automation/script/module.py +++ b/cmx4mlops/cmx4mlops/repo/automation/script/module.py @@ -45,6 +45,7 @@ def __init__(self, cmind, automation_file): self.run_state['fake_deps'] = False self.run_state['parent'] = None self.run_state['version_info'] = [] + self.run_state['cache'] = False self.file_with_cached_state = 'cm-cached-state.json' @@ -87,7 +88,8 @@ def __init__(self, cmind, automation_file): 'accept_license', 'skip_system_deps', 'git_ssh', - 'gh_token'] + 'gh_token', + 'hf_token'] ############################################################ @@ -520,28 +522,7 @@ def _run(self, i): if os.environ.get(key, '') != '' and env.get(key, '') == '': env[key] = os.environ[key] - # Check path/input/output in input and pass to env - for key in self.input_flags_converted_to_tmp_env: - value = i.get(key, '').strip() - if value != '': - env['CM_TMP_' + key.upper()] = value - - for key in self.input_flags_converted_to_env: - value = i.get( - key, - '').strip() if isinstance( - i.get( - key, - ''), - str) else i.get( - key, - '') - if value: - env[f"CM_{key.upper()}"] = value - - r = update_env_with_values(env) - if r['return'] > 0: - return r + r = self._update_env_from_input(env, i) ####################################################################### # Check if we want to skip cache (either by skip_cache or by fake_run) @@ -860,6 +841,7 @@ def _run(self, i): 'alias', '') run_state['script_repo_git'] = script_artifact.repo_meta.get( 'git', False) + run_state['cache'] = meta.get('cache', False) if not recursion: run_state['script_entry_repo_to_report_errors'] = meta.get( @@ -1159,7 +1141,7 @@ def _run(self, i): # Check if the output of a selected script should be cached cache = False if i.get( 'skip_cache', - False) else meta.get( + False) else run_state.get( 'cache', False) cache = cache or ( @@ -1324,7 +1306,7 @@ def _run(self, i): r = self._call_run_deps(prehook_deps, self.local_env_keys, local_env_keys_from_meta, env, state, const, const_state, add_deps_recursive, recursion_spaces + extra_recursion_spaces, - remembered_selections, variation_tags_string, found_cached, debug_script_tags, verbose, show_time, extra_recursion_spaces, run_state) + remembered_selections, variation_tags_string, True, debug_script_tags, verbose, show_time, extra_recursion_spaces, run_state) if r['return'] > 0: return r @@ -1385,7 +1367,7 @@ def _run(self, i): r = self._call_run_deps(posthook_deps, self.local_env_keys, clean_env_keys_post_deps, env, state, const, const_state, add_deps_recursive, recursion_spaces + extra_recursion_spaces, - remembered_selections, variation_tags_string, found_cached, debug_script_tags, verbose, show_time, extra_recursion_spaces, run_state) + remembered_selections, variation_tags_string, True, debug_script_tags, verbose, show_time, extra_recursion_spaces, run_state) if r['return'] > 0: return r @@ -1396,7 +1378,7 @@ def _run(self, i): # Check chain of post dependencies on other CM scripts r = self._call_run_deps(post_deps, self.local_env_keys, clean_env_keys_post_deps, env, state, const, const_state, add_deps_recursive, recursion_spaces + extra_recursion_spaces, - remembered_selections, variation_tags_string, found_cached, debug_script_tags, verbose, show_time, extra_recursion_spaces, run_state) + remembered_selections, variation_tags_string, True, debug_script_tags, verbose, show_time, extra_recursion_spaces, run_state) if r['return'] > 0: return r @@ -1618,6 +1600,82 @@ def _run(self, i): if r['return'] > 0: return r + # Prepare common input to prepare and run script + run_script_input = { + 'path': path, + 'bat_ext': bat_ext, + 'os_info': os_info, + 'const': const, + 'state': state, + 'const_state': const_state, + 'reuse_cached': reuse_cached, + 'recursion': recursion, + 'recursion_spaces': recursion_spaces, + 'remembered_selections': remembered_selections, + 'tmp_file_run_state': self.tmp_file_run_state, + 'tmp_file_run_env': self.tmp_file_run_env, + 'tmp_file_state': self.tmp_file_state, + 'tmp_file_run': self.tmp_file_run, + 'local_env_keys': self.local_env_keys, + 'local_env_keys_from_meta': local_env_keys_from_meta, + 'posthook_deps': posthook_deps, + 'add_deps_recursive': add_deps_recursive, + 'remembered_selections': remembered_selections, + 'found_script_tags': found_script_tags, + 'variation_tags_string': variation_tags_string, + 'found_cached': False, + 'debug_script_tags': debug_script_tags, + 'verbose': verbose, + 'meta': meta, + 'self': self + } + + # Check and run predeps in customize.py + if str(meta.get('predeps', 'True')).lower() not in ["0", "false", "no"] and os.path.isfile( + path_to_customize_py): # possible duplicate execution - needs fix + r = utils.load_python_module( + {'path': path, 'name': 'customize'}) + if r['return'] > 0: + return r + + customize_code = r['code'] + + customize_common_input = { + 'input': i, + 'automation': self, + 'artifact': script_artifact, + 'customize': script_artifact.meta.get('customize', {}), + 'os_info': os_info, + 'recursion_spaces': recursion_spaces, + 'script_tags': script_tags, + 'variation_tags': variation_tags + } + run_script_input['customize_code'] = customize_code + run_script_input['customize_common_input'] = customize_common_input + + if repro_prefix != '': + run_script_input['repro_prefix'] = repro_prefix + if ignore_script_error: + run_script_input['ignore_script_error'] = True + if 'predeps' in dir(customize_code) and not fake_run: + + logging.debug( + recursion_spaces + + ' - Running preprocess ...') + + run_script_input['run_state'] = run_state + + ii = copy.deepcopy(customize_common_input) + ii['env'] = env + ii['state'] = state + ii['meta'] = meta + # may need to detect versions in multiple paths + ii['run_script_input'] = run_script_input + + r = customize_code.predeps(ii) + if r['return'] > 0: + return r + # Check chain of dependencies on other CM scripts if len(deps) > 0: logging.debug(recursion_spaces + @@ -1639,6 +1697,8 @@ def _run(self, i): # Clean some output files clean_tmp_files(clean_files, recursion_spaces) + # Repeated code + ''' # Prepare common input to prepare and run script run_script_input = { 'path': path, @@ -1668,6 +1728,7 @@ def _run(self, i): 'meta': meta, 'self': self } + ''' if os.path.isfile( path_to_customize_py): # possible duplicate execution - needs fix r = utils.load_python_module( @@ -1732,9 +1793,16 @@ def _run(self, i): tmp_curdir = os.getcwd() if env.get('CM_OUTDIRNAME', '') != '': - if not os.path.exists(env['CM_OUTDIRNAME']): - os.makedirs(env['CM_OUTDIRNAME']) - os.chdir(env['CM_OUTDIRNAME']) + if os.path.isabs(env['CM_OUTDIRNAME']) or recursion: + c_outdirname = env['CM_OUTDIRNAME'] + else: + c_outdirname = os.path.join( + env['CM_TMP_CURRENT_PATH'], env['CM_OUTDIRNAME']) + env['CM_OUTDIRNAME'] = c_outdirname + + if not os.path.exists(c_outdirname): + os.makedirs(c_outdirname) + os.chdir(c_outdirname) # Check if pre-process and detect if 'preprocess' in dir(customize_code) and not fake_run: @@ -2128,29 +2196,6 @@ def _run(self, i): if print_readme or repro_prefix != '': readme = self._get_readme(cmd, run_state) - # Copy Docker sample - if repro_prefix != '' and repro_dir != '': - docker_template_path = os.path.join( - self.path, 'docker_repro_example') - if os.path.isdir(docker_template_path): - try: - - shutil.copytree( - docker_template_path, - repro_dir, - dirs_exist_ok=True) - except Exception as e: - pass - - docker_container = self._get_docker_container(cmd, run_state) - - try: - - with open(os.path.join(repro_dir, 'ubuntu-23.04.Dockerfile'), 'a+') as f: - f.write(docker_container) - except BaseException: - pass - if print_readme: with open('README-cm.md', 'w') as f: f.write(readme) @@ -2223,6 +2268,34 @@ def _run(self, i): return rr + ########################################################################## + + def _update_env_from_input(self, env, i): + # Check path/input/output in input and pass to env + for key in self.input_flags_converted_to_tmp_env: + value = i.get(key, '').strip() + if value != '': + env['CM_TMP_' + key.upper()] = value + + for key in self.input_flags_converted_to_env: + value = i.get( + key, + '').strip() if isinstance( + i.get( + key, + ''), + str) else i.get( + key, + '') + if value: + env[f"CM_{key.upper()}"] = value + + r = update_env_with_values(env) + if r['return'] > 0: + return r + + return {'return': 0} + ########################################################################## def _fix_cache_paths(self, env): cm_repos_path = os.environ.get( @@ -2312,7 +2385,6 @@ def _update_state_from_variations(self, i, meta, variation_tags, variations, env run_state['variation_groups'] = variation_groups # Add variation(s) if specified in the "tags" input prefixed by _ - # If there is only 1 default variation, then just use it or # substitute from CMD @@ -2828,7 +2900,6 @@ def search(self, i): # Print filtered paths if console if console: for script in r['list']: - # This should not be logging since the output can be consumed by other external tools and scripts # logging.info(script.path) print(script.path) @@ -3729,26 +3800,23 @@ def _get_readme(self, cmd_parts, run_state): content = '' content += """ -*This README was automatically generated by the [CM framework](https://github.com/mlcommons/ck).* +*This README was automatically generated.* ## Install CM ```bash -pip install cmind -U +pip install cm4mlops ``` Check [this readme](https://github.com/mlcommons/ck/blob/master/docs/installation.md) with more details about installing CM and dependencies across different platforms (Ubuntu, MacOS, Windows, RHEL, ...). -## Install CM automation repositories - -```bash -cm pull repo mlcommons@cm4mlops --checkout=dev """ current_cm_repo = run_state['script_repo_alias'] - if current_cm_repo not in ['mlcommons@ck', 'mlcommons@cm4mlops']: + if current_cm_repo not in [ + 'mlcommons@mlperf-automations', 'mlcommons@cm4mlops']: content += '\ncm pull repo ' + \ run_state['script_repo_alias'] + '\n' @@ -3788,57 +3856,6 @@ def _get_readme(self, cmd_parts, run_state): return content - ########################################################################## - def _get_docker_container(self, cmd_parts, run_state): - """ - Outputs a Markdown README file listing the CM run commands for the dependencies - """ - - deps = run_state['deps'] - - version_info = run_state.get('version_info', []) - version_info_dict = {} - - for v in version_info: - k = list(v.keys())[0] - version_info_dict[k] = v[k] - - content = '' - - content += """ - -# The following CM commands were automatically generated (prototype) - -cm pull repo mlcommons@cm4mlops --checkout=dev - -""" - current_cm_repo = run_state['script_repo_alias'] - if current_cm_repo not in ['mlcommons@ck', 'mlcommons@cm4mlops']: - content += '\ncm pull repo ' + \ - run_state['script_repo_alias'] + '\n\n' - - deps_ = '' - - for dep_tags in deps: - - xversion = '' - version = version_info_dict.get(dep_tags, {}).get('version', '') - if version != '': - xversion = ' --version={}\n'.format(version) - - content += "# cm run script --tags=" + \ - dep_tags + "{}\n\n".format(xversion) - - cmd = "cm run script " - - for cmd_part in cmd_parts: - x = '"' if ' ' in cmd_part and not cmd_part.startswith('-') else '' - cmd = cmd + " " + x + cmd_part + x - - content += cmd + '\n' - - return content - ########################################################################## def _print_versions(self, run_state): @@ -4130,7 +4147,6 @@ def find_file_in_paths(self, i): return rx else: # Version was detected - detected_version = rx.get('version', '') if detected_version != '': @@ -4693,41 +4709,6 @@ def doc(self, i): return utils.call_internal_module( self, __file__, 'module_misc', 'doc', i) - ############################################################ - def gui(self, i): - """ - Run GUI for CM script. - - Args: - (CM input dict): - - Returns: - (CM return dict): - - * return (int): return code == 0 if no error and >0 if error - * (error) (str): error string if return>0 - - """ - - artifact = i.get('artifact', '') - tags = '' - if artifact != '': - if ' ' in artifact: - tags = artifact.replace(' ', ',') - - if tags == '': - tags = i.get('tags', '') - - if 'tags' in i: - del (i['tags']) - - i['action'] = 'run' - i['artifact'] = 'gui' - i['parsed_artifact'] = [('gui', '605cac42514a4c69')] - i['script'] = tags.replace(',', ' ') - - return self.cmind.access(i) - ############################################################ def dockerfile(self, i): @@ -4795,7 +4776,7 @@ def docker(self, i): (docker_os_version) (str): force docker OS version (default: 22.04) (docker_image_tag_extra) (str): add extra tag (default:-latest) - (docker_cm_repo) (str): force CM automation repository when building Docker (default: cm4mlops) + (docker_cm_repo) (str): force CM automation repository when building Docker (default: mlperf-automations) (docker_cm_repos) (docker_cm_repo_flags) @@ -5541,7 +5522,6 @@ def prepare_and_run_script_with_postprocessing(i, postprocess="postprocess"): if rc > 0 and not i.get('ignore_script_error', False): # Check if print files when error print_files = meta.get('print_files_if_script_error', []) - if len(print_files) > 0: for pr in print_files: if os.path.isfile(pr): @@ -5567,20 +5547,13 @@ def prepare_and_run_script_with_postprocessing(i, postprocess="postprocess"): script_repo_alias.replace('@', '/') + '/issues' if repo_to_report == '': - repo_to_report = 'https://github.com/mlcommons/cm4mlops/issues' + repo_to_report = 'https://github.com/mlcommons/mlperf-automations/issues' note = ''' ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Note that it is often a portability issue of a third-party tool or a native script -wrapped and unified by this CM script (automation recipe). Please re-run -this script with --repro flag and report this issue with the original -command line, cm-repro directory and full log here: - -{} - -The CM concept is to collaboratively fix such issues inside portable CM scripts -to make existing tools and native scripts more portable, interoperable -and deterministic. Thank you'''.format(repo_to_report) +Please file an issue at {} along with the full CM command being run and the relevant +or full console log. +'''.format(repo_to_report) rr = { 'return': 2, @@ -5800,7 +5773,10 @@ def convert_env_to_script(env, os_info, start_script=None): key = key[1:] # Append the existing environment variable to the new value - env_value = f"{env_separator.join(env_value)}{env_separator}{os_info['env_var'].replace('env_var', key)}" + env_value = f"""{ + env_separator.join(env_value)}{env_separator}{ + os_info['env_var'].replace( + 'env_var', key)}""" # Replace placeholders in the platform-specific environment command env_command = os_info['set_env'].replace( @@ -6086,6 +6062,9 @@ def update_state_from_meta(meta, env, state, const, const_state, deps, post_deps Internal: update env and state from meta """ + if meta.get('cache', '') != '': + run_state['cache'] = meta['cache'] + default_env = meta.get('default_env', {}) for key in default_env: env.setdefault(key, default_env[key]) diff --git a/cmx4mlops/cmx4mlops/repo/automation/script/module_misc.py b/cmx4mlops/cmx4mlops/repo/automation/script/module_misc.py index 22b4cf2fd..00883ba05 100644 --- a/cmx4mlops/cmx4mlops/repo/automation/script/module_misc.py +++ b/cmx4mlops/cmx4mlops/repo/automation/script/module_misc.py @@ -1647,12 +1647,12 @@ def dockerfile(i): 'docker_cm_repo', docker_settings.get( 'cm_repo', - 'mlcommons@cm4mlops')) + 'mlcommons@mlperf-automations')) cm_repo_branch = i.get( 'docker_cm_repo_branch', docker_settings.get( 'cm_repo_branch', - 'mlperf-inference')) + 'main')) cm_repo_flags = i.get( 'docker_cm_repo_flags', @@ -1915,6 +1915,9 @@ def docker(i): noregenerate_docker_file = i.get('docker_noregenerate', False) norecreate_docker_image = i.get('docker_norecreate', True) + recreate_docker_image = i.get('docker_recreate', False) + if recreate_docker_image: # force recreate + norecreate_docker_image = False if i.get('docker_skip_build', False): noregenerate_docker_file = True @@ -1987,8 +1990,6 @@ def docker(i): env['CM_DOCKER_CACHE'] = docker_cache image_repo = i.get('docker_image_repo', '') - if image_repo == '': - image_repo = 'local' # Host system needs to have docker r = self_module.cmind.access({'action': 'run', @@ -2084,6 +2085,14 @@ def docker(i): continue ''' + r = script_automation._update_env_from_input(env, i) + if r['return'] > 0: + return r + + # mount outdirname path + if env.get('CM_OUTDIRNAME', '') != '': + mounts.append(f"""{env['CM_OUTDIRNAME']}:{env['CM_OUTDIRNAME']}""") + # Check if need to update/map/mount inputs and env r = process_inputs({'run_cmd_arc': i_run_cmd_arc, 'docker_settings': docker_settings, @@ -2174,7 +2183,7 @@ def docker(i): # env keys corresponding to container mounts are explicitly passed to # the container run cmd - container_env_string = '' + container_env = {} for index in range(len(mounts)): mount = mounts[index] # Since windows may have 2 :, we search from the right @@ -2216,7 +2225,6 @@ def docker(i): new_container_mount, new_container_mount_env = get_container_path( env[tmp_value]) container_env_key = new_container_mount_env - # container_env_string += " --env.{}={} ".format(tmp_value, new_container_mount_env) else: # we skip those mounts mounts[index] = None skip = True @@ -2228,8 +2236,7 @@ def docker(i): continue mounts[index] = new_host_mount + ":" + new_container_mount if host_env_key: - container_env_string += " --env.{}={} ".format( - host_env_key, container_env_key) + container_env[host_env_key] = container_env_key for v in docker_input_mapping: if docker_input_mapping[v] == host_env_key: @@ -2260,10 +2267,16 @@ def docker(i): for key in proxy_keys: if os.environ.get(key, '') != '': value = os.environ[key] - container_env_string += " --env.{}={} ".format(key, value) + container_env[key] = value env['+ CM_DOCKER_BUILD_ARGS'].append( "{}={}".format(key, value)) + if container_env: + if not i_run_cmd.get('env'): + i_run_cmd['env'] = container_env + else: + i_run_cmd['env'] = {**i_run_cmd['env'], **container_env} + docker_use_host_group_id = i.get( 'docker_use_host_group_id', docker_settings.get('use_host_group_id')) @@ -2308,7 +2321,7 @@ def docker(i): 'docker_cm_repo', docker_settings.get( 'cm_repo', - 'mlcommons@cm4mlops')) + 'mlcommons@mlperf-automations')) docker_path = i.get('docker_path', '').strip() if docker_path == '': @@ -2405,8 +2418,7 @@ def docker(i): 'docker_run_cmd_prefix': i.get('docker_run_cmd_prefix', '')}) if r['return'] > 0: return r - run_cmd = r['run_cmd_string'] + ' ' + \ - container_env_string + ' --docker_run_deps ' + run_cmd = r['run_cmd_string'] + ' ' + ' --docker_run_deps ' env['CM_RUN_STATE_DOCKER'] = True @@ -2422,7 +2434,8 @@ def docker(i): print(final_run_cmd) print('') - docker_recreate_image = 'yes' if not norecreate_docker_image else 'no' + docker_recreate_image = 'yes' if str(norecreate_docker_image).lower() not in [ + "yes", "true", "1"] else 'no' if i.get('docker_push_image', '') in ['True', True, 'yes']: env['CM_DOCKER_PUSH_IMAGE'] = 'yes' @@ -2436,10 +2449,8 @@ def docker(i): 'docker_os_version': docker_os_version, 'cm_repo': cm_repo, 'env': env, - 'image_repo': image_repo, 'interactive': interactive, 'mounts': mounts, - 'image_name': image_name, # 'image_tag': script_alias, 'image_tag_extra': image_tag_extra, 'detached': detached, @@ -2456,6 +2467,12 @@ def docker(i): } } + if image_repo: + cm_docker_input['image_repo'] = image_repo + + if image_name: + cm_docker_input['image_name'] = image_name + if all_gpus: cm_docker_input['all_gpus'] = True diff --git a/cmx4mlops/cmx4mlops/repo/automation/utils/module_cfg.py b/cmx4mlops/cmx4mlops/repo/automation/utils/module_cfg.py index e3a445c12..497822808 100644 --- a/cmx4mlops/cmx4mlops/repo/automation/utils/module_cfg.py +++ b/cmx4mlops/cmx4mlops/repo/automation/utils/module_cfg.py @@ -242,10 +242,11 @@ def process_base(meta, full_path): else: base[k] = meta[k] - -elif isinstance(v, elif) for vv in v: + elif type(v) == list: + for vv in v: base[k].append(vv) -elif isinstance(v, elif ) base[k].merge(v) + elif type(v) == dict: + base[k].merge(v) meta = base diff --git a/cmx4mlops/cmx4mlops/repo/script/README.md b/cmx4mlops/cmx4mlops/repo/script/README.md index d2667369c..a9e5e4145 100644 --- a/cmx4mlops/cmx4mlops/repo/script/README.md +++ b/cmx4mlops/cmx4mlops/repo/script/README.md @@ -1,40 +1,13 @@ -## About +### About -Portable CM automations for MLOps and MLPerf. +This is a source code of portable and reusable automation recipes +from MLCommons projects with a [human-friendly CM interface](https://github.com/mlcommons/ck) - +you can find a human-readable catalog of these automation recipes [here](../../docs/list_of_scripts.md). -## License +### License [Apache 2.0](../../LICENSE.md) -## Copyright +### Copyright -© 2022-2025 MLCommons. All Rights Reserved. - -Grigori Fursin, the cTuning foundation and OctoML donated the CK and CM projects to MLCommons to benefit everyone. - -This file is licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. A copy of the License can be obtained at: - -[Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) - -Unless required by applicable law or agreed to in writing, software distributed under the License is provided on an "AS IS" basis, without warranties or conditions of any kind, either express or implied. Please refer to the License for the specific language governing permissions and limitations under the License. - -## Maintainer(s) - -* MLCommons - -## CM author - -[Grigori Fursin](https://cKnowledge.org/gfursin) - -## CM script developers - -Arjun Suresh, Anandhu Sooraj, Grigori Fursin - -## Parent project - -Visit the [parent Collective Knowledge project](https://github.com/mlcommons/ck) for further details. - -## Citing this project - -If you found the CM automations helpful, kindly reference this article: -[ [ArXiv](https://arxiv.org/abs/2406.16791) ] +2022-2024 [MLCommons](https://mlcommons.org) diff --git a/cmx4mlops/cmx4mlops/repo/script/app-image-classification-onnx-py/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/app-image-classification-onnx-py/_cm.yaml index 740a8a18a..e53b91ec2 100644 --- a/cmx4mlops/cmx4mlops/repo/script/app-image-classification-onnx-py/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/app-image-classification-onnx-py/_cm.yaml @@ -22,7 +22,6 @@ default_env: deps: - tags: detect,os -#- tags: get,sys-utils-cm - names: - python - python3 diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/COPYRIGHT.md b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/COPYRIGHT.md new file mode 100644 index 000000000..a059b0c49 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/COPYRIGHT.md @@ -0,0 +1,9 @@ +# Copyright Notice + +© 2024-2025 MLCommons. All Rights Reserved. + +This file is licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. A copy of the License can be obtained at: + +[Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) + +Unless required by applicable law or agreed to in writing, software distributed under the License is provided on an "AS IS" basis, without warranties or conditions of any kind, either express or implied. Please refer to the License for the specific language governing permissions and limitations under the License. diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/README-extra.md b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/README-extra.md new file mode 100644 index 000000000..582991f6d --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/README-extra.md @@ -0,0 +1 @@ +# CM script diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/_cm.yaml new file mode 100644 index 000000000..b22f119d6 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/_cm.yaml @@ -0,0 +1,469 @@ +alias: app-mlperf-automotive-mlcommons-python +uid: 621240c5d30a437c + +automation_alias: script +automation_uid: 5b4e0237da074764 + +category: "Modular MLPerf inference benchmark pipeline for ABTF model" + + +# User-friendly tags to find this CM script +tags: +- demo +- run-mlperf-inference +- object-detection +- abtf-model + + +# Default environment +default_env: + CM_MLPERF_LOADGEN_MODE: accuracy + CM_MLPERF_LOADGEN_SCENARIO: Offline + CM_MLPERF_LOADGEN_BUILD_FROM_SRC: 'on' + CM_OUTPUT_FOLDER_NAME: test_results + CM_MLPERF_RUN_STYLE: test + CM_TEST_QUERY_COUNT: '10' + CM_MLPERF_QUANTIZATION: off + CM_MLPERF_SUT_NAME_IMPLEMENTATION_PREFIX: reference + CM_MLPERF_SUT_NAME_RUN_CONFIG_SUFFIX: '' + + +# Map script inputs to environment variables +input_mapping: + device: CM_MLPERF_DEVICE + count: CM_MLPERF_LOADGEN_QUERY_COUNT + docker: CM_RUN_DOCKER_CONTAINER + hw_name: CM_HW_NAME + imagenet_path: IMAGENET_PATH + max_batchsize: CM_MLPERF_LOADGEN_MAX_BATCHSIZE + mode: CM_MLPERF_LOADGEN_MODE + num_threads: CM_NUM_THREADS + threads: CM_NUM_THREADS + dataset: CM_MLPERF_VISION_DATASET_OPTION + model: CM_MLPERF_CUSTOM_MODEL_PATH + output_dir: OUTPUT_BASE_DIR + power: CM_MLPERF_POWER + power_server: CM_MLPERF_POWER_SERVER_ADDRESS + ntp_server: CM_MLPERF_POWER_NTP_SERVER + max_amps: CM_MLPERF_POWER_MAX_AMPS + max_volts: CM_MLPERF_POWER_MAX_VOLTS + regenerate_files: CM_REGENERATE_MEASURE_FILES + rerun: CM_RERUN + scenario: CM_MLPERF_LOADGEN_SCENARIO + test_query_count: CM_TEST_QUERY_COUNT + clean: CM_MLPERF_CLEAN_SUBMISSION_DIR + dataset_args: CM_MLPERF_EXTRA_DATASET_ARGS + target_qps: CM_MLPERF_LOADGEN_TARGET_QPS + target_latency: CM_MLPERF_LOADGEN_TARGET_LATENCY + offline_target_qps: CM_MLPERF_LOADGEN_OFFLINE_TARGET_QPS + server_target_qps: CM_MLPERF_LOADGEN_SERVER_TARGET_QPS + singlestream_target_latency: CM_MLPERF_LOADGEN_SINGLESTREAM_TARGET_LATENCY + multistream_target_latency: CM_MLPERF_LOADGEN_MULTISTREAM_TARGET_LATENCY + output: CM_MLPERF_OUTPUT_DIR + +# Duplicate CM environment variables to the ones used in native apps +env_key_mappings: + CM_HOST_: HOST_ + CM_ML_: ML_ + CM_MLPERF_TVM: MLPERF_TVM + CM_MLPERF_DELETE: MLPERF_DELETE + +# Env keys which are exposed to higher level scripts +new_env_keys: + - CM_MLPERF_* + - CM_DATASET_* + - CM_HW_NAME + - CM_COGNATA_ACCURACY_DUMP_FILE + - CM_OUTPUT_PREDICTIONS_PATH + - CM_ML_MODEL_* + - CM_MAX_EXAMPLES + +new_state_keys: + - mlperf-inference-implementation + - CM_SUT_* + +# Dependencies on other CM scripts +deps: + + # Detect host OS features + - tags: detect,os + + # Detect host CPU features + - tags: detect,cpu + + # Install system dependencies on a given host + - tags: get,sys-utils-cm + + # Detect/install python + - tags: get,python + names: + - python + - python3 + + # Use cmind inside CM scripts + - tags: get,generic-python-lib,_package.cmind + + + # CUDA + - tags: get,cuda + enable_if_env: + USE_CUDA: + - yes + names: + - cuda + + + + ######################################################################## + # Install ML engines via CM + + ## Onnx CPU Runtime + - tags: get,generic-python-lib,_onnxruntime + names: + - ml-engine-onnxruntime + - onnxruntime + enable_if_env: + CM_MLPERF_BACKEND: + - onnxruntime + - tvm-onnx + CM_MLPERF_DEVICE: + - cpu + - rocm + + ## Onnx CUDA Runtime + - tags: get,generic-python-lib,_onnxruntime_gpu + names: + - ml-engine-onnxruntime-cuda + enable_if_env: + CM_MLPERF_BACKEND: + - onnxruntime + - tvm-onnx + CM_MLPERF_DEVICE: + - gpu + skip_if_env: + CM_MODEL: + - 3d-unet-99 + - 3d-unet-99.9 + + ## resnet50 and 3d-unet need both onnxruntime and onnxruntime_gpu on cuda + - tags: get,generic-python-lib,_onnxruntime + enable_if_env: + CM_MLPERF_BACKEND: + - onnxruntime + CM_MLPERF_DEVICE: + - gpu + CM_MODEL: + - 3d-unet-99 + - 3d-unet-99.9 + - resnet50 + - tags: get,generic-python-lib,_onnxruntime_gpu + env: + CM_GENERIC_PYTHON_PIP_UNINSTALL_DEPS: "" + enable_if_env: + CM_MLPERF_BACKEND: + - onnxruntime + CM_MLPERF_DEVICE: + - gpu + CM_MODEL: + - 3d-unet-99 + - 3d-unet-99.9 + - resnet50 + + ## Pytorch (CPU) + - tags: get,generic-python-lib,_torch + names: + - ml-engine-pytorch + - pytorch + enable_if_env: + CM_MLPERF_BACKEND: + - pytorch + - tvm-pytorch + CM_MLPERF_DEVICE: + - cpu + - rocm + + ## Pytorch (CUDA) + - tags: get,generic-python-lib,_torch_cuda + names: + - ml-engine-pytorch + - pytorch + enable_if_env: + CM_MLPERF_BACKEND: + - pytorch + - tvm-pytorch + - ray + CM_MLPERF_DEVICE: + - gpu + + ## Torchvision (CPU) + - tags: get,generic-python-lib,_torchvision + names: + - ml-engine-torchvision + enable_if_env: + CM_MLPERF_BACKEND: + - pytorch + - tvm-pytorch + CM_MLPERF_DEVICE: + - cpu + + ## Torchvision (CUDA) + - tags: get,generic-python-lib,_torchvision_cuda + names: + - ml-engine-torchvision + enable_if_env: + CM_MLPERF_BACKEND: + - pytorch + - tvm-pytorch + - ray + CM_MLPERF_DEVICE: + - gpu + + ## tensorrt + - tags: get,generic-python-lib,_tensorrt + names: + - ml-engine-tensorrt + enable_if_env: + CM_MLPERF_BACKEND: + - ray + + ## torch_tensorrt + - tags: get,generic-python-lib,_torch_tensorrt + names: + - ml-engine-torch_tensorrt + enable_if_env: + CM_MLPERF_BACKEND: + - ray + + ## Ray + - tags: get,generic-python-lib,_ray + names: + - ray + enable_if_env: + CM_MLPERF_BACKEND: + - ray + + + + ## Tensorflow + - tags: get,generic-python-lib,_tensorflow + names: + - ml-engine-tensorflow + - tensorflow + enable_if_env: + CM_MLPERF_BACKEND: + - tf + - tflite + + # Install MLPerf inference dependencies + + + + # Creates user conf for given SUT + - tags: generate,user-conf,mlperf,inference + names: + - user-conf-generator + + + # Install MLPerf loadgen + - tags: get,generic-python-lib,_package.mlcommons-loadgen + enable_if_env: + CM_MLPERF_LOADGEN_BUILD_FROM_SRC: + - "off" + names: + - loadgen + - mlperf-inference-loadgen + + - tags: get,loadgen + enable_if_any_env: + CM_MLPERF_LOADGEN_BUILD_FROM_SRC: + - "on" + names: + - loadgen + - mlperf-inference-loadgen + - mlperf-inference-loadgen-from-src + + +# +# # Download MLPerf inference source +# - tags: get,mlcommons,inference,src +# env: +# CM_GET_MLPERF_IMPLEMENTATION_ONLY: 'yes' +# names: +# - mlperf-implementation + + - tags: get,generic-python-lib,_package.psutil + + + + +prehook_deps: + - names: + - remote-run-cmds + tags: remote,run,cmds + enable_if_env: + CM_ASSH_RUN_COMMANDS: + - "on" + + + +posthook_deps: + - names: + - mlperf-runner + tags: benchmark-mlperf + skip_if_env: + CM_MLPERF_SKIP_RUN: + - "on" + + +post_deps: + - tags: save,mlperf,inference,state + names: + - save-mlperf-inference-state + + +docker: + real_run: false + +# Variations to customize dependencies +variations: + # Implementation + python: + group: implementation + default: true + env: + CM_MLPERF_PYTHON: 'yes' + CM_MLPERF_IMPLEMENTATION: reference + + + # ML engine + onnxruntime: + group: framework + env: + CM_MLPERF_BACKEND: onnxruntime + + onnxruntime,cpu: + env: + CM_MLPERF_BACKEND_VERSION: <<>> + + onnxruntime,cuda: + env: + CM_MLPERF_BACKEND_VERSION: <<>> + ONNXRUNTIME_PREFERRED_EXECUTION_PROVIDER: "CUDAExecutionProvider" + + + pytorch: + group: framework + default: true + add_deps_recursive: + imagenet-preprocessed: + tags: _NCHW + openimages-preprocessed: + tags: _NCHW + ml-model: + tags: raw,_pytorch + env: + CM_MLPERF_BACKEND: pytorch + CM_MLPERF_BACKEND_VERSION: <<>> + + + + +# retinanet: +# group: models +# deps: +# - tags: get,generic-python-lib,_opencv-python +# - tags: get,generic-python-lib,_numpy +# - tags: get,generic-python-lib,_pycocotools +# +# env: +# CM_MODEL: retinanet +# CM_MLPERF_USE_MLCOMMONS_RUN_SCRIPT: 'yes' +# CM_MLPERF_LOADGEN_MAX_BATCHSIZE: '1' + + + abtf-demo-model: + group: models + deps: + - tags: get,generic-python-lib,_opencv-python + - tags: get,generic-python-lib,_numpy + - tags: get,generic-python-lib,_pycocotools + - tags: get,generic-python-lib,_package.torchmetrics + - tags: get,generic-python-lib,_package.faster-coco-eval + version_max: "1.5.7" + version_max_usable: "1.5.7" + names: + - cocoeval + - tags: get,dataset,raw,mlcommons-cognata + names: + - raw-dataset-mlcommons-cognata + - tags: get,ml-model,abtf-ssd-pytorch,_abtf-mvp + names: + - ml-model-abtf + + env: + CM_MODEL: retinanet + + abtf-poc-model: + group: models + default: true + deps: + - tags: get,generic-python-lib,_opencv-python + - tags: get,generic-python-lib,_numpy + version_max: "1.26.4" + version_max_usable: "1.26.4" + - tags: get,generic-python-lib,_pycocotools + - tags: get,generic-python-lib,_package.torchmetrics + - tags: get,generic-python-lib,_package.faster-coco-eval + version_max: "1.5.7" + version_max_usable: "1.5.7" + names: + - cocoeval + - tags: get,dataset,raw,mlcommons-cognata,_abtf-poc + skip_if_env: + CM_RUN_STATE_DOCKER: + - 'yes' + names: + - raw-dataset-mlcommons-cognata + - tags: get,ml-model,abtf-ssd-pytorch,_abtf-poc + names: + - ml-model-abtf + + env: + CM_MODEL: retinanet + + # Target devices + cpu: + group: device + default: true + env: + CM_MLPERF_DEVICE: cpu + CUDA_VISIBLE_DEVICES: '' + USE_CUDA: no + USE_GPU: no + + cuda: + group: device + env: + CM_MLPERF_DEVICE: gpu + USE_CUDA: yes + USE_GPU: yes + + + + # Loadgen scenarios + offline: + env: + CM_MLPERF_LOADGEN_SCENARIO: Offline + multistream: + env: + CM_MLPERF_LOADGEN_SCENARIO: MultiStream + singlestream: + env: + CM_MLPERF_LOADGEN_SCENARIO: SingleStream + CM_MLPERF_LOADGEN_MAX_BATCHSIZE: 1 + server: + env: + CM_MLPERF_LOADGEN_SCENARIO: Server + + mvp_demo: + env: diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/customize.py b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/customize.py new file mode 100644 index 000000000..58ee04e1f --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/customize.py @@ -0,0 +1,253 @@ +from cmind import utils +import os +import json +import shutil +import subprocess + + +def preprocess(i): + + os_info = i['os_info'] + env = i['env'] + state = i['state'] + script_path = i['run_script_input']['path'] + + if env.get('CM_MLPERF_SKIP_RUN', '') == "yes": + return {'return': 0} + + if env.get('CM_RUN_DOCKER_CONTAINER', '') == "yes": + return {'return': 0} + + if env.get('CM_MLPERF_POWER', '') == "yes": + power = "yes" + else: + power = "no" + + rerun = True if env.get("CM_RERUN", "") != '' else False + + if 'CM_MLPERF_LOADGEN_SCENARIO' not in env: + env['CM_MLPERF_LOADGEN_SCENARIO'] = "Offline" + + if 'CM_MLPERF_LOADGEN_MODE' not in env: + env['CM_MLPERF_LOADGEN_MODE'] = "accuracy" + + if 'CM_MODEL' not in env: + return { + 'return': 1, 'error': "Please select a variation specifying the model to run"} + + # if env['CM_MODEL'] == "resnet50": + # cmd = "cp " + os.path.join(env['CM_DATASET_AUX_PATH'], "val.txt") + " " + os.path.join(env['CM_DATASET_PATH'], + # "val_map.txt") + # ret = os.system(cmd) + + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] = " " + \ + env.get('CM_MLPERF_LOADGEN_EXTRA_OPTIONS', '') + " " + + if 'CM_MLPERF_LOADGEN_QPS' not in env: + env['CM_MLPERF_LOADGEN_QPS_OPT'] = "" + else: + env['CM_MLPERF_LOADGEN_QPS_OPT'] = " --qps " + \ + env['CM_MLPERF_LOADGEN_QPS'] + + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += env['CM_MLPERF_LOADGEN_QPS_OPT'] + + if 'CM_NUM_THREADS' not in env: + if 'CM_MINIMIZE_THREADS' in env: + env['CM_NUM_THREADS'] = str(int(env['CM_HOST_CPU_TOTAL_CORES']) // + (int(env.get('CM_HOST_CPU_SOCKETS', '1')) * int(env.get('CM_HOST_CPU_TOTAL_CORES', '1')))) + else: + env['CM_NUM_THREADS'] = env.get('CM_HOST_CPU_TOTAL_CORES', '1') + + if env.get('CM_MLPERF_LOADGEN_MAX_BATCHSIZE', '') != '' and not env.get( + 'CM_MLPERF_MODEL_SKIP_BATCHING', False): + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --max-batchsize " + \ + str(env['CM_MLPERF_LOADGEN_MAX_BATCHSIZE']) + + if env.get('CM_MLPERF_LOADGEN_BATCH_SIZE', '') != '': + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --batch-size " + \ + str(env['CM_MLPERF_LOADGEN_BATCH_SIZE']) + + if env.get('CM_MLPERF_LOADGEN_QUERY_COUNT', '') != '' and not env.get( + 'CM_TMP_IGNORE_MLPERF_QUERY_COUNT', False) and env.get('CM_MLPERF_RUN_STYLE', '') != "valid": + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --count " + \ + env['CM_MLPERF_LOADGEN_QUERY_COUNT'] + + print("Using MLCommons Inference source from '" + + env['CM_MLPERF_INFERENCE_SOURCE'] + "'") + + if 'CM_MLPERF_CONF' not in env: + env['CM_MLPERF_CONF'] = os.path.join( + env['CM_MLPERF_INFERENCE_SOURCE'], "mlperf.conf") + + x = "" if os_info['platform'] == 'windows' else "'" + if "llama2-70b" in env['CM_MODEL']: + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --mlperf-conf " + \ + x + env['CM_MLPERF_CONF'] + x + else: + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --mlperf_conf " + \ + x + env['CM_MLPERF_CONF'] + x + + env['MODEL_DIR'] = env.get('CM_ML_MODEL_PATH') + if not env['MODEL_DIR']: + env['MODEL_DIR'] = os.path.dirname( + env.get( + 'CM_MLPERF_CUSTOM_MODEL_PATH', + env.get('CM_ML_MODEL_FILE_WITH_PATH'))) + + RUN_CMD = "" + + scenario = env['CM_MLPERF_LOADGEN_SCENARIO'] + scenario_extra_options = '' + + NUM_THREADS = env['CM_NUM_THREADS'] + if int(NUM_THREADS) > 2 and env['CM_MLPERF_DEVICE'] == "gpu": + NUM_THREADS = "2" # Don't use more than 2 threads when run on GPU + + if env['CM_MODEL'] in ['resnet50', 'retinanet', 'stable-diffusion-xl']: + scenario_extra_options += " --threads " + NUM_THREADS + + ml_model_name = env['CM_MODEL'] + if 'CM_MLPERF_USER_CONF' in env: + user_conf_path = env['CM_MLPERF_USER_CONF'] + x = "" if os_info['platform'] == 'windows' else "'" + scenario_extra_options += " --user_conf " + x + user_conf_path + x + + mode = env['CM_MLPERF_LOADGEN_MODE'] + mode_extra_options = "" + + # Grigori blocked for ABTF to preprocess data set on the fly for now + # we can later move it to a separate script to preprocess data set + +# if 'CM_DATASET_PREPROCESSED_PATH' in env and env['CM_MODEL'] in [ 'resnet50', 'retinanet' ]: +# #dataset_options = " --use_preprocessed_dataset --preprocessed_dir "+env['CM_DATASET_PREPROCESSED_PATH'] +# if env.get('CM_MLPERF_LAST_RELEASE') not in [ "v2.0", "v2.1" ]: +# dataset_options = " --use_preprocessed_dataset --cache_dir "+env['CM_DATASET_PREPROCESSED_PATH'] +# else: +# dataset_options = "" +# if env['CM_MODEL'] == "retinanet": +# dataset_options += " --dataset-list "+ env['CM_DATASET_ANNOTATIONS_FILE_PATH'] +# elif env['CM_MODEL'] == "resnet50": +# dataset_options += " --dataset-list "+ os.path.join(env['CM_DATASET_AUX_PATH'], "val.txt") +# env['DATA_DIR'] = env.get('CM_DATASET_PREPROCESSED_PATH') +# else: +# if 'CM_DATASET_PREPROCESSED_PATH' in env: +# env['DATA_DIR'] = env.get('CM_DATASET_PREPROCESSED_PATH') +# else: +# env['DATA_DIR'] = env.get('CM_DATASET_PATH') +# dataset_options = '' + + # Grigori added for ABTF +# dataset_path = env.get('CM_DATASET_PATH') +# env['DATA_DIR'] = dataset_path + +# dataset_options = " --dataset-list " + env['CM_DATASET_ANNOTATIONS_FILE_PATH'] +# dataset_options += " --cache_dir " + os.path.join(script_path, 'preprocessed-dataset') + + dataset_options = '' + + if env.get('CM_MLPERF_EXTRA_DATASET_ARGS', '') != '': + dataset_options += " " + env['CM_MLPERF_EXTRA_DATASET_ARGS'] + + if mode == "accuracy": + mode_extra_options += " --accuracy" + env['CM_OUTPUT_PREDICTIONS_PATH'] = os.path.join( + env['CM_DATASET_MLCOMMONS_COGNATA_PATH'], + env['CM_DATASET_MLCOMMONS_COGNATA_SERIAL_NUMBERS'], + 'Cognata_Camera_01_8M_png', + 'output') + + elif mode == "performance": + pass + + elif mode == "compliance": + + audit_full_path = env['CM_MLPERF_INFERENCE_AUDIT_PATH'] + mode_extra_options = " --audit '" + audit_full_path + "'" + + if env.get('CM_MLPERF_OUTPUT_DIR', '') == '': + env['CM_MLPERF_OUTPUT_DIR'] = os.getcwd() + + mlperf_implementation = env.get('CM_MLPERF_IMPLEMENTATION', 'reference') + + # Generate CMD + + # Grigori updated for ABTF demo +# cmd, run_dir = get_run_cmd(os_info, env, scenario_extra_options, mode_extra_options, dataset_options, mlperf_implementation) + cmd, run_dir = get_run_cmd_reference( + os_info, env, scenario_extra_options, mode_extra_options, dataset_options, script_path) + + if env.get('CM_NETWORK_LOADGEN', '') == "lon": + + run_cmd = i['state']['mlperf_inference_run_cmd'] + env['CM_SSH_RUN_COMMANDS'] = [] + env['CM_SSH_RUN_COMMANDS'].append( + run_cmd.replace( + "--network=lon", + "--network=sut") + " &") + + env['CM_MLPERF_RUN_CMD'] = cmd + env['CM_RUN_DIR'] = run_dir + env['CM_RUN_CMD'] = cmd + env['CK_PROGRAM_TMP_DIR'] = env.get('CM_ML_MODEL_PATH') # for tvm + + if env.get('CM_HOST_PLATFORM_FLAVOR', '') == "arm64": + env['CM_HOST_PLATFORM_FLAVOR'] = "aarch64" + + if not env.get('CM_COGNATA_ACCURACY_DUMP_FILE'): + env['CM_COGNATA_ACCURACY_DUMP_FILE'] = os.path.join( + env['OUTPUT_DIR'], "accuracy.txt") + + return {'return': 0} + + +def get_run_cmd_reference(os_info, env, scenario_extra_options, + mode_extra_options, dataset_options, script_path=None): + + q = '"' if os_info['platform'] == 'windows' else "'" + + ########################################################################## + # Grigori added for ABTF demo + + if env['CM_MODEL'] in ['retinanet']: + + run_dir = os.path.join(script_path, 'ref') + + env['RUN_DIR'] = run_dir + + env['OUTPUT_DIR'] = env['CM_MLPERF_OUTPUT_DIR'] + + cognata_dataset_path = env['CM_DATASET_MLCOMMONS_COGNATA_PATH'] +# cognata_dataset_path = env['CM_DATASET_PATH'] # Using open images +# dataset for some tests + + path_to_model = env.get( + 'CM_MLPERF_CUSTOM_MODEL_PATH', + env.get( + 'CM_ML_MODEL_FILE_WITH_PATH', + env.get('CM_ML_MODEL_CODE_WITH_PATH'))) + env['MODEL_FILE'] = path_to_model + + cmd = env['CM_PYTHON_BIN_WITH_PATH'] + " " + os.path.join(run_dir, "python", "main.py") + " --profile " + env['CM_MODEL'] + "-" + env['CM_MLPERF_BACKEND'] + \ + " --model=" + q + path_to_model + q + \ + " --dataset=" + env["CM_MLPERF_VISION_DATASET_OPTION"] + \ + " --dataset-path=" + q + cognata_dataset_path + q + \ + " --cache_dir=" + q + os.path.join(script_path, 'tmp-preprocessed-dataset') + q + \ + " --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + " " + \ + " --output " + q + env['OUTPUT_DIR'] + q + " " + \ + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \ + scenario_extra_options + mode_extra_options + dataset_options + + ########################################################################## + + return cmd, run_dir + + +def postprocess(i): + + env = i['env'] + + state = i['state'] + + inp = i['input'] + + return {'return': 0} diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/LICENSE.md b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/LICENSE.md new file mode 100644 index 000000000..f433b1a53 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/LICENSE.md @@ -0,0 +1,177 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/README.md b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/README.md new file mode 100644 index 000000000..b2dcb039f --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/README.md @@ -0,0 +1,2 @@ +Base code was taken from https://github.com/mlcommons/inference/tree/master/vision/classification_and_detection +and modified to prototype support for Cognata data set and ABTF model. diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/__init__.py b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/backend.py b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/backend.py new file mode 100644 index 000000000..955eddb88 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/backend.py @@ -0,0 +1,23 @@ +""" +abstract backend class +""" + + +# pylint: disable=unused-argument,missing-docstring + +class Backend(): + def __init__(self): + self.inputs = [] + self.outputs = [] + + def version(self): + raise NotImplementedError("Backend:version") + + def name(self): + raise NotImplementedError("Backend:name") + + def load(self, model_path, inputs=None, outputs=None): + raise NotImplementedError("Backend:load") + + def predict(self, feed): + raise NotImplementedError("Backend:predict") diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/backend_pytorch_native.py b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/backend_pytorch_native.py new file mode 100644 index 000000000..ec5401979 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/backend_pytorch_native.py @@ -0,0 +1,95 @@ +""" +Pytoch native backend +Extended by Grigori Fursin for the ABTF demo +""" +# pylint: disable=unused-argument,missing-docstring +import torch # currently supports pytorch1.0 +import torchvision +import backend + +import os +import sys +import importlib + + +class BackendPytorchNative(backend.Backend): + def __init__(self): + super(BackendPytorchNative, self).__init__() + self.sess = None + self.model = None + self.device = "cuda:0" if torch.cuda.is_available() else "cpu" + + # Grigori added for ABTF model + self.config = None + self.num_classes = None + self.image_size = None + + def version(self): + return torch.__version__ + + def name(self): + return "pytorch-native" + + def image_format(self): + return "NCHW" + + def load(self, model_path, inputs=None, outputs=None): + + # From ABTF code + sys.path.insert(0, os.environ['CM_ML_MODEL_CODE_WITH_PATH']) + + from src.transform import SSDTransformer + from src.utils import generate_dboxes, Encoder, colors, coco_classes + from src.model import SSD, ResNet + + abtf_model_config = os.environ.get('CM_ABTF_ML_MODEL_CONFIG', '') + + num_classes_str = os.environ.get('CM_ABTF_NUM_CLASSES', '').strip() + self.num_classes = int( + num_classes_str) if num_classes_str != '' else 15 + + self.config = importlib.import_module('config.' + abtf_model_config) + self.image_size = self.config.model['image_size'] + + self.model = SSD( + self.config.model, + backbone=ResNet( + self.config.model), + num_classes=self.num_classes) + + checkpoint = torch.load( + model_path, + map_location=torch.device( + self.device)) + + self.model.load_state_dict(checkpoint["model_state_dict"]) + + if self.device.startswith('cuda'): + self.model.cuda() + + self.model.eval() + + self.model = self.model.to(self.device) + + self.inputs = inputs + self.outputs = outputs + + return self + + def predict(self, feed): + # For ABTF + + # Always first element for now (later may stack for batching) + img = feed['image'][0] + + if torch.cuda.is_available(): + img = img.cuda() + + inp = img.unsqueeze(dim=0) + + with torch.no_grad(): + ploc, plabel = self.model(inp) + + output = (ploc, plabel) + + return output diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/cognata.py b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/cognata.py new file mode 100644 index 000000000..005fa4e2d --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/cognata.py @@ -0,0 +1,351 @@ +""" +Original code was extended by Grigori Fursin to support cognata data set +""" + +import json +import logging +import os +import time + +import cv2 +from PIL import Image + +import numpy as np +from pycocotools.cocoeval import COCOeval +# import pycoco +import dataset + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger("cognata") + + +class Cognata(dataset.Dataset): + def __init__(self, data_path, image_list, name, use_cache=0, image_size=None, + image_format="NHWC", pre_process=None, count=None, cache_dir=None, preprocessed_dir=None, use_label_map=False, threads=os.cpu_count(), + model_config=None, model_num_classes=None, model_image_size=None): # For ABTF + super().__init__() + + self.image_size = image_size + self.image_list = [] + self.label_list = [] + self.image_ids = [] + self.image_sizes = [] + self.count = count + self.use_cache = use_cache + self.data_path = data_path + self.pre_process = pre_process + self.use_label_map = use_label_map + + self.model_config = model_config + self.model_num_classes = model_num_classes + self.model_image_size = model_image_size + self.ignore_classes = None + self.files = None + self.dboxes = None + self.transform = None + self.label_map = None + self.label_info = None + self.image_bin = [] + self.encoder = None + self.targets = [] + + ####################################################################### + # From ABTF source + + import torch + from src.utils import generate_dboxes, Encoder + from src.transform import SSDTransformer + from src.dataset import prepare_cognata + import cognata_labels + import csv + import ast + + self.dboxes = generate_dboxes(model_config.model, model="ssd") + self.transform = SSDTransformer( + self.dboxes, self.model_image_size, val=True) + self.encoder = Encoder(self.dboxes) + + folders = model_config.dataset['folders'] + cameras = model_config.dataset['cameras'] + self.ignore_classes = [2, 25, 31] + if 'ignore_classes' in model_config.dataset: + self.ignore_classes = model_config.dataset['ignore_classes'] + + # Grigori added for tests + # Check if overridden by extrnal environment for tests + x = os.environ.get( + 'CM_DATASET_MLCOMMONS_COGNATA_SERIAL_NUMBERS', + '').strip() + if x != '': + folders = x.split(';') if ';' in x else [x] + + x = os.environ.get( + 'CM_DATASET_MLCOMMONS_COGNATA_GROUP_NAMES', + '').strip() + if x != '': + cameras = x.split(';') if ';' in x else [x] + + print('') + print('Cognata folders: {}'.format(str(folders))) + print('Cognata cameras: {}'.format(str(cameras))) + print('') + + # From ABTF source + print('') + print('Scanning Cognata dataset ...') + start = time.time() + files, label_map, label_info = prepare_cognata( + data_path, folders, cameras, self.ignore_classes) + + self.files = files + + print(' Number of files found: {}'.format(len(files))) + print(' Time: {:.2f} sec.'.format(time.time() - start)) + + if os.environ.get( + 'CM_ABTF_ML_MODEL_TRAINING_FORCE_COGNATA_LABELS', '') == 'yes': + label_map = cognata_labels.label_map + label_info = cognata_labels.label_info + + self.label_map = label_map + self.label_info = label_info + + if self.model_num_classes is not None: + self.model_num_classes = len(label_map.keys()) + + print('') + print('Preloading and preprocessing Cognata dataset on the fly ...') + + start = time.time() + + idx = 0 + + for f in self.files: + + image_name = self.files[idx]['img'] + + img = Image.open(image_name).convert('RGB') + + width, height = img.size + boxes = [] + boxes2 = [] + labels = [] + gt_boxes = [] + targets = [] + with open(self.files[idx]['ann']) as f: + reader = csv.reader(f) + rows = list(reader) + header = rows[0] + annotations = rows[1:] + bbox_index = header.index('bounding_box_2D') + class_index = header.index('object_class') + distance_index = header.index('center_distance') + for annotation in annotations: + bbox = annotation[bbox_index] + bbox = ast.literal_eval(bbox) + object_width = bbox[2] - bbox[0] + object_height = bbox[3] - bbox[1] + object_area = object_width * object_height + label = ast.literal_eval(annotation[class_index]) + distance = ast.literal_eval(annotation[distance_index]) + if object_area < 50 or int( + label) in self.ignore_classes or object_height < 8 or object_width < 8 or distance > 300: + continue + label = self.label_map[label] + boxes.append([bbox[0] / width, bbox[1] / height, + bbox[2] / width, bbox[3] / height]) + boxes2.append([bbox[0], bbox[1], bbox[2], bbox[3]]) + gt_boxes.append( + [bbox[0], bbox[1], bbox[2], bbox[3], label, 0, 0]) + labels.append(label) + + boxes = torch.tensor(boxes) + boxes2 = torch.tensor(boxes2) + labels = torch.tensor(labels) + gt_boxes = torch.tensor(gt_boxes) + + targets.append({'boxes': boxes2.to(device='cpu'), + 'labels': labels.to(device='cpu', + dtype=torch.int32)}) + + img, (height, width), boxes, labels = self.transform( + img, (height, width), boxes, labels, max_num=500) + + _, height, width = img.shape + + self.image_bin.append(img) + self.image_ids.append(idx) + self.image_list.append(image_name) + self.image_sizes.append((height, width)) + + self.label_list.append((labels, boxes)) + + self.targets.append(targets) + + # limit the dataset if requested + idx += 1 + if self.count is not None and idx >= self.count: + break + + print(' Time: {:.2f} sec.'.format(time.time() - start)) + print('') + + return + + def get_item(self, nr): + """Get image by number in the list.""" + + return self.image_bin[nr], self.label_list[nr] + + def get_item_loc(self, nr): + + return self.files[nr]['img'] + + # Grigori added here to be able to return Torch tensor and not Numpy + + def get_samples(self, id_list): + + data = [self.image_list_inmemory[idx] for idx in id_list] + labels = [self.label_list[idx] for idx in id_list] + + return data, labels + + +class PostProcessCognata: + """ + Post processing for tensorflow ssd-mobilenet style models + """ + + def __init__(self): + self.results = [] + self.good = 0 + self.total = 0 + self.content_ids = [] + self.use_inv_map = False + + def add_results(self, results): + self.results.extend(results) + + def __call__(self, results, ids, expected=None, result_dict=None, ): + + # Dummy + processed_results = [] + return processed_results + + def start(self): + self.results = [] + self.good = 0 + self.total = 0 + + def finalize(self, result_dict, ds=None, output_dir=None): + + # To be improved + + from torchmetrics.detection.mean_ap import MeanAveragePrecision + metric = MeanAveragePrecision( + iou_type="bbox", + class_metrics=True, + backend='faster_coco_eval') + + result_dict["good"] += self.good + result_dict["total"] += self.total + + preds = [] + targets = [] + # For now batch_size = 1 + for idx in range(0, len(self.results)): + preds.append(self.results[idx][0]) + id = self.results[idx][0]['id'] + targets.append(ds.targets[id][0]) + metric.update(preds, targets) + + metrics = metric.compute() + + print('=================================================') + import pprint + pp = pprint.PrettyPrinter(indent=4) + pp.pprint(metrics) + print('=================================================') + + classes = metrics['classes'].tolist() + map_per_classes = metrics['map_per_class'].tolist() + + final_map = {} + for c in range(0, len(classes)): + final_map[ds.label_info[classes[c]]] = float(map_per_classes[c]) + + result_dict["mAP"] = float(metrics['map']) + result_dict["mAP_classes"] = final_map + + +class PostProcessCognataPt(PostProcessCognata): + """ + Post processing required by ssd-resnet34 / pytorch + """ + + def __init__(self, nms_threshold, max_output, + score_threshold, height, width): + super().__init__() + self.nms_threshold = nms_threshold + self.max_output = max_output + self.score_threshold = score_threshold + self.height = height + self.width = width + + def __call__(self, results, ids, expected=None, result_dict=None): + # results come as: + # detection_boxes,detection_classes,detection_scores + + import torch + + processed_results = [] + + # For now 1 result (batch 1) - need to add support for batch size > 1 + # later + ploc = results[0] + plabel = results[1] + + # Get predictions (from cognata_eval) +# ploc, plabel = model(img) + ploc, plabel = ploc.float(), plabel.float() + + preds = [] + + for i in range(ploc.shape[0]): + dts = [] + labels = [] + scores = [] + + ploc_i = ploc[i, :, :].unsqueeze(0) + plabel_i = plabel[i, :, :].unsqueeze(0) + + result = self.encoder.decode_batch( + ploc_i, plabel_i, self.nms_threshold, self.max_output)[0] + + loc, label, prob = [r.cpu().numpy() for r in result] + for loc_, label_, prob_ in zip(loc, label, prob): + if label_ in expected[i][0]: + self.good += 1 + self.total += 1 + dts.append([loc_[0] * + self.width, loc_[1] * + self.height, loc_[2] * + self.width, loc_[3] * + self.height,]) + labels.append(label_) + scores.append(prob_) + + dts = torch.tensor(dts, device='cpu') + labels = torch.tensor(labels, device='cpu', dtype=torch.int32) + scores = torch.tensor(scores, device='cpu') + preds.append({'boxes': dts, 'labels': labels, + 'scores': scores, 'id': ids[i]}) + + # Only batch size supported + idx = 0 + + processed_results.append(preds) + + # self.total += 1 + + return processed_results diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/cognata_labels.py b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/cognata_labels.py new file mode 100644 index 000000000..b77f09b0e --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/cognata_labels.py @@ -0,0 +1,49 @@ +label_map = { + 0: 0, + 28: 1, + 29: 2, + 33: 3, + 8: 4, + 36: 5, + 13: 6, + 11: 7, + 35: 8, + 14: 9, + 9: 10, + 48: 11, + 10: 12, + 46: 13, + 44: 14} +label_info = {0: 'background', 1: 'Traffic_light', 2: 'Props', 3: 'TrafficSign', + 4: 'Car', 5: 'Van', 6: 'Rider', 7: 'Motorcycle', 8: 'Bicycle', + 9: 'Pedestrian', 10: 'Truck', 11: 'PersonalMobility', 12: 'Bus', 13: 'Trailer', 14: 'Animal'} + +colors = [None, (39, 129, 113), (164, 80, 133), (83, 122, 114), (99, 81, 172), (95, 56, 104), (37, 84, 86), + (14, 89, 122), + (80, 7, 65), (10, 102, 25), (90, 185, 109), (106, 110, + 132), (169, 158, 85), (188, 185, 26), (103, 1, 17), + (82, 144, 81), (92, 7, 184), (49, 81, 155), (179, + 177, 69), (93, 187, 158), (13, 39, 73), (12, 50, 60), + (16, 179, 33), (112, 69, 165), (15, 139, 63), (33, 191, + 159), (182, 173, 32), (34, 113, 133), (90, 135, 34), + (53, 34, 86), (141, 35, 190), (6, 171, 8), (118, 76, + 112), (89, 60, 55), (15, 54, 88), (112, 75, 181), + (42, 147, 38), (138, 52, 63), (128, 65, 149), (106, + 103, 24), (168, 33, 45), (28, 136, 135), (86, 91, 108), + (52, 11, 76), (142, 6, 189), (57, 81, 168), (55, 19, + 148), (182, 101, 89), (44, 65, 179), (1, 33, 26), + (122, 164, 26), (70, 63, 134), (137, 106, 82), (120, + 118, 52), (129, 74, 42), (182, 147, 112), (22, 157, 50), + (56, 50, 20), (2, 22, 177), (156, 100, 106), (21, + 35, 42), (13, 8, 121), (142, 92, 28), (45, 118, 33), + (105, 118, 30), (7, 185, 124), (46, 34, 146), (105, + 184, 169), (22, 18, 5), (147, 71, 73), (181, 64, 91), + (31, 39, 184), (164, 179, 33), (96, 50, 18), (95, 15, + 106), (113, 68, 54), (136, 116, 112), (119, 139, 130), + (31, 139, 34), (66, 6, 127), (62, 39, 2), (49, 99, + 180), (49, 119, 155), (153, 50, 183), (125, 38, 3), + (129, 87, 143), (49, 87, 40), (128, 62, 120), (73, 85, + 148), (28, 144, 118), (29, 9, 24), (175, 45, 108), + (81, 175, 64), (178, 19, 157), (74, 188, 190), (18, + 114, 2), (62, 128, 96), (21, 3, 150), (0, 6, 95), + (2, 20, 184), (122, 37, 185)] diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/dataset.py b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/dataset.py new file mode 100644 index 000000000..9b8af84f5 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/dataset.py @@ -0,0 +1,303 @@ +""" +dataset related classes and methods +""" + +# pylint: disable=unused-argument,missing-docstring + +import logging +import sys +import time + +import cv2 +import numpy as np + + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger("dataset") + + +class Item(): + def __init__(self, label, img, idx): + self.label = label + self.img = img + self.idx = idx + self.start = time.time() + + +def usleep(sec): + if sys.platform == 'win32': + # on windows time.sleep() doesn't work to well + import ctypes + kernel32 = ctypes.windll.kernel32 + timer = kernel32.CreateWaitableTimerA( + ctypes.c_void_p(), True, ctypes.c_void_p()) + delay = ctypes.c_longlong(int(-1 * (10 * 1000000 * sec))) + kernel32.SetWaitableTimer( + timer, + ctypes.byref(delay), + 0, + ctypes.c_void_p(), + ctypes.c_void_p(), + False) + kernel32.WaitForSingleObject(timer, 0xffffffff) + else: + time.sleep(sec) + + +class Dataset(): + def __init__(self): + self.arrival = None + self.image_list = [] + self.label_list = [] + self.image_list_inmemory = {} + self.last_loaded = -1 + + def preprocess(self, use_cache=True): + raise NotImplementedError("Dataset:preprocess") + + def get_item_count(self): + return len(self.image_list) + + def get_list(self): + raise NotImplementedError("Dataset:get_list") + + def load_query_samples(self, sample_list): + self.image_list_inmemory = {} + for sample in sample_list: + self.image_list_inmemory[sample], _ = self.get_item(sample) + self.last_loaded = time.time() + + def unload_query_samples(self, sample_list): + if sample_list: + for sample in sample_list: + if sample in self.image_list_inmemory: + del self.image_list_inmemory[sample] + else: + self.image_list_inmemory = {} + + def get_samples(self, id_list): + data = np.array([self.image_list_inmemory[id] for id in id_list]) + return data, self.label_list[id_list] + + def get_item_loc(self, id): + raise NotImplementedError("Dataset:get_item_loc") + + +# +# Post processing +# +class PostProcessCommon: + def __init__(self, offset=0): + self.offset = offset + self.good = 0 + self.total = 0 + + def __call__(self, results, ids, expected=None, result_dict=None): + processed_results = [] + n = len(results[0]) + for idx in range(0, n): + result = results[0][idx] + self.offset + processed_results.append([result]) + if result == expected[idx]: + self.good += 1 + self.total += n + return processed_results + + def add_results(self, results): + pass + + def start(self): + self.good = 0 + self.total = 0 + + def finalize(self, results, ds=False, output_dir=None): + results["good"] = self.good + results["total"] = self.total + + +class PostProcessArgMax: + def __init__(self, offset=0): + self.offset = offset + self.good = 0 + self.total = 0 + + def __call__(self, results, ids, expected=None, result_dict=None): + processed_results = [] + results = np.argmax(results[0], axis=1) + n = results.shape[0] + for idx in range(0, n): + result = results[idx] + self.offset + processed_results.append([result]) + if result == expected[idx]: + self.good += 1 + self.total += n + return processed_results + + def add_results(self, results): + pass + + def start(self): + self.good = 0 + self.total = 0 + + def finalize(self, results, ds=False, output_dir=None): + results["good"] = self.good + results["total"] = self.total + + +# +# pre-processing +# + +def center_crop(img, out_height, out_width): + height, width, _ = img.shape + left = int((width - out_width) / 2) + right = int((width + out_width) / 2) + top = int((height - out_height) / 2) + bottom = int((height + out_height) / 2) + img = img[top:bottom, left:right] + return img + + +def resize_with_aspectratio( + img, out_height, out_width, scale=87.5, inter_pol=cv2.INTER_LINEAR): + height, width, _ = img.shape + new_height = int(100. * out_height / scale) + new_width = int(100. * out_width / scale) + if height > width: + w = new_width + h = int(new_height * height / width) + else: + h = new_height + w = int(new_width * width / height) + img = cv2.resize(img, (w, h), interpolation=inter_pol) + return img + + +def pre_process_vgg(img, dims=None, need_transpose=False): + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + output_height, output_width, _ = dims + cv2_interpol = cv2.INTER_AREA + img = resize_with_aspectratio( + img, + output_height, + output_width, + inter_pol=cv2_interpol) + img = center_crop(img, output_height, output_width) + img = np.asarray(img, dtype='float32') + + # normalize image + means = np.array([123.68, 116.78, 103.94], dtype=np.float32) + img -= means + + # transpose if needed + if need_transpose: + img = img.transpose([2, 0, 1]) + return img + + +def pre_process_mobilenet(img, dims=None, need_transpose=False): + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + output_height, output_width, _ = dims + img = resize_with_aspectratio( + img, + output_height, + output_width, + inter_pol=cv2.INTER_LINEAR) + img = center_crop(img, output_height, output_width) + img = np.asarray(img, dtype='float32') + + img /= 255.0 + img -= 0.5 + img *= 2 + + # transpose if needed + if need_transpose: + img = img.transpose([2, 0, 1]) + return img + + +def pre_process_imagenet_pytorch(img, dims=None, need_transpose=False): + from PIL import Image + import torchvision.transforms.functional as F + + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + img = Image.fromarray(img) + img = F.resize(img, 256, Image.BILINEAR) + img = F.center_crop(img, 224) + img = F.to_tensor(img) + img = F.normalize( + img, mean=[ + 0.485, 0.456, 0.406], std=[ + 0.229, 0.224, 0.225], inplace=False) + if not need_transpose: + img = img.permute(1, 2, 0) # NHWC + img = np.asarray(img, dtype='float32') + return img + + +def maybe_resize(img, dims): + img = np.array(img, dtype=np.float32) + if len(img.shape) < 3 or img.shape[2] != 3: + # some images might be grayscale + img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + if dims is not None: + im_height, im_width, _ = dims + img = cv2.resize(img, (im_width, im_height), + interpolation=cv2.INTER_LINEAR) + return img + + +def pre_process_coco_mobilenet(img, dims=None, need_transpose=False): + img = maybe_resize(img, dims) + img = np.asarray(img, dtype=np.uint8) + # transpose if needed + if need_transpose: + img = img.transpose([2, 0, 1]) + return img + + +def pre_process_coco_pt_mobilenet(img, dims=None, need_transpose=False): + img = maybe_resize(img, dims) + img -= 127.5 + img /= 127.5 + # transpose if needed + if need_transpose: + img = img.transpose([2, 0, 1]) + return img + + +def pre_process_coco_resnet34(img, dims=None, need_transpose=False): + img = maybe_resize(img, dims) + mean = np.array([0.485, 0.456, 0.406], dtype=np.float32) + std = np.array([0.229, 0.224, 0.225], dtype=np.float32) + + img = img / 255. - mean + img = img / std + + if need_transpose: + img = img.transpose([2, 0, 1]) + + return img + + +def pre_process_coco_resnet34_tf(img, dims=None, need_transpose=False): + img = maybe_resize(img, dims) + mean = np.array([123.68, 116.78, 103.94], dtype=np.float32) + img = img - mean + if need_transpose: + img = img.transpose([2, 0, 1]) + + return img + + +def pre_process_openimages_retinanet(img, dims=None, need_transpose=False): + img = maybe_resize(img, dims) + img /= 255. + # transpose if needed + if need_transpose: + img = img.transpose([2, 0, 1]) + return img diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/main.py b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/main.py new file mode 100644 index 000000000..e4462da8c --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/main.py @@ -0,0 +1,659 @@ +""" +mlperf inference benchmarking tool +""" + +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import argparse +import array +import collections +import json +import logging +import os +import sys +import threading +import time +from queue import Queue +from PIL import Image +import mlperf_loadgen as lg +import numpy as np +import cv2 +import glob +import dataset +import cognata +import cognata_labels + +# import imagenet +# import coco +# import openimages + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger("main") + +NANO_SEC = 1e9 +MILLI_SEC = 1000 + +# pylint: disable=missing-docstring + +# the datasets we support +SUPPORTED_DATASETS = { + "cognata-4mp-pt": + (cognata.Cognata, None, cognata.PostProcessCognataPt(0.5, 200, 0.05, 1440, 2560), + {"image_size": [1440, 2560, 3]}), + "cognata-8mp-pt": + (cognata.Cognata, None, cognata.PostProcessCognataPt(0.5, 200, 0.05, 2160, 3840), + {"image_size": [2160, 3840, 3]}) +} + +# pre-defined command line options so simplify things. They are used as defaults and can be +# overwritten from command line + +SUPPORTED_PROFILES = { + "defaults": { + "dataset": "imagenet", + "backend": "tensorflow", + "cache": 0, + "max-batchsize": 32, + }, + + # retinanet + "retinanet-pytorch": { + "inputs": "image", + "outputs": "boxes,labels,scores", + "dataset": "openimages-800-retinanet", + "backend": "pytorch-native", + "model-name": "retinanet", + }, +} + +SCENARIO_MAP = { + "SingleStream": lg.TestScenario.SingleStream, + "MultiStream": lg.TestScenario.MultiStream, + "Server": lg.TestScenario.Server, + "Offline": lg.TestScenario.Offline, +} + +last_timeing = [] + + +def get_args(): + """Parse commandline.""" + parser = argparse.ArgumentParser() + parser.add_argument( + "--dataset", + choices=SUPPORTED_DATASETS.keys(), + help="dataset") + parser.add_argument( + "--dataset-path", + required=True, + help="path to the dataset") + parser.add_argument("--dataset-list", help="path to the dataset list") + parser.add_argument( + "--data-format", + choices=[ + "NCHW", + "NHWC"], + help="data format") + parser.add_argument( + "--profile", + choices=SUPPORTED_PROFILES.keys(), + help="standard profiles") + parser.add_argument("--scenario", default="SingleStream", + help="mlperf benchmark scenario, one of " + str(list(SCENARIO_MAP.keys()))) + parser.add_argument( + "--max-batchsize", + type=int, + help="max batch size in a single inference") + parser.add_argument("--model", required=True, help="model file") + parser.add_argument("--output", default="output", help="test results") + parser.add_argument("--inputs", help="model inputs") + parser.add_argument("--outputs", help="model outputs") + parser.add_argument("--backend", help="runtime to use") + parser.add_argument( + "--model-name", + help="name of the mlperf model, ie. resnet50") + parser.add_argument( + "--threads", + default=os.cpu_count(), + type=int, + help="threads") + parser.add_argument("--qps", type=int, help="target qps") + parser.add_argument("--cache", type=int, default=0, help="use cache") + parser.add_argument( + "--cache_dir", + type=str, + default=None, + help="dir path for caching") + parser.add_argument( + "--preprocessed_dir", + type=str, + default=None, + help="dir path for storing preprocessed images (overrides cache_dir)") + parser.add_argument( + "--use_preprocessed_dataset", + action="store_true", + help="use preprocessed dataset instead of the original") + parser.add_argument( + "--accuracy", + action="store_true", + help="enable accuracy pass") + parser.add_argument( + "--find-peak-performance", + action="store_true", + help="enable finding peak performance pass") + parser.add_argument( + "--debug", + action="store_true", + help="debug, turn traces on") + + # file to use mlperf rules compliant parameters + parser.add_argument( + "--mlperf_conf", + default="../../mlperf.conf", + help="mlperf rules config") + # file for user LoadGen settings such as target QPS + parser.add_argument( + "--user_conf", + default="user.conf", + help="user config for user LoadGen settings such as target QPS") + # file for LoadGen audit settings + parser.add_argument( + "--audit_conf", + default="audit.config", + help="config for LoadGen audit settings") + + # below will override mlperf rules compliant settings - don't use for + # official submission + parser.add_argument("--time", type=int, help="time to scan in seconds") + parser.add_argument("--count", type=int, help="dataset items to use") + parser.add_argument( + "--performance-sample-count", + type=int, + help="performance sample count") + parser.add_argument( + "--max-latency", + type=float, + help="mlperf max latency in pct tile") + parser.add_argument( + "--samples-per-query", + default=8, + type=int, + help="mlperf multi-stream samples per query") + args = parser.parse_args() + + # don't use defaults in argparser. Instead we default to a dict, override that with a profile + # and take this as default unless command line give + defaults = SUPPORTED_PROFILES["defaults"] + + if args.profile: + profile = SUPPORTED_PROFILES[args.profile] + defaults.update(profile) + for k, v in defaults.items(): + kc = k.replace("-", "_") + if getattr(args, kc) is None: + setattr(args, kc, v) + if args.inputs: + args.inputs = args.inputs.split(",") + if args.outputs: + args.outputs = args.outputs.split(",") + + if args.scenario not in SCENARIO_MAP: + parser.error("valid scanarios:" + str(list(SCENARIO_MAP.keys()))) + return args + + +def get_backend(backend): + if backend == "null": + from backend_null import BackendNull + backend = BackendNull() + elif backend == "pytorch": + from backend_pytorch import BackendPytorch + backend = BackendPytorch() + elif backend == "pytorch-native": + from backend_pytorch_native import BackendPytorchNative + backend = BackendPytorchNative() + else: + raise ValueError("unknown backend: " + backend) + return backend + + +class Item: + """An item that we queue for processing by the thread pool.""" + + def __init__(self, query_id, content_id, img, label=None): + self.query_id = query_id + self.content_id = content_id + self.img = img + self.label = label + self.start = time.time() + + +class RunnerBase: + def __init__(self, model, ds, threads, post_proc=None, max_batchsize=128): + self.take_accuracy = False + self.ds = ds + self.model = model + self.post_process = post_proc + self.threads = threads + self.take_accuracy = False + self.max_batchsize = max_batchsize + self.result_timing = [] + self.proc_results = [] + + def handle_tasks(self, tasks_queue): + pass + + def start_run(self, result_dict, take_accuracy): + self.result_dict = result_dict + self.result_timing = [] + self.take_accuracy = take_accuracy + self.post_process.start() + + def run_one_item(self, qitem): + # run the prediction + processed_results = [] + try: + results = self.model.predict({self.model.inputs[0]: qitem.img}) + + processed_results = self.post_process( + results, qitem.content_id, qitem.label, self.result_dict) + if self.take_accuracy: + self.post_process.add_results(processed_results) + + self.result_timing.append(time.time() - qitem.start) + + except Exception as ex: # pylint: disable=broad-except + src = [self.ds.get_item_loc(i) for i in qitem.content_id] + log.error("thread: failed on contentid=%s, %s", src, ex) + # since post_process will not run, fake empty responses + processed_results = [[]] * len(qitem.query_id) + finally: + response_array_refs = [] + response = [] + for idx, query_id in enumerate(qitem.query_id): + + # Temporal hack for Cognata to add only boxes - fix + processed_results2 = [x['boxes'].numpy() + for x in processed_results[idx]] + self.proc_results.append([{'boxes': x['boxes'].tolist(), 'scores': x['scores'].tolist(), 'labels': x['labels'].tolist(), 'id': x['id']} + for x in processed_results[idx]]) + response_array = array.array("B", np.array( + processed_results2, np.float32).tobytes()) + response_array_refs.append(response_array) + bi = response_array.buffer_info() + response.append(lg.QuerySampleResponse(query_id, bi[0], bi[1])) + lg.QuerySamplesComplete(response) + + def enqueue(self, query_samples): + idx = [q.index for q in query_samples] + query_id = [q.id for q in query_samples] + if len(query_samples) < self.max_batchsize: + data, label = self.ds.get_samples(idx) + self.run_one_item(Item(query_id, idx, data, label)) + else: + bs = self.max_batchsize + for i in range(0, len(idx), bs): + data, label = self.ds.get_samples(idx[i:i + bs]) + self.run_one_item( + Item(query_id[i:i + bs], idx[i:i + bs], data, label)) + + def finish(self): + pass + + +class QueueRunner(RunnerBase): + def __init__(self, model, ds, threads, post_proc=None, max_batchsize=128): + super().__init__(model, ds, threads, post_proc, max_batchsize) + self.tasks = Queue(maxsize=threads * 4) + self.workers = [] + self.result_dict = {} + + for _ in range(self.threads): + worker = threading.Thread( + target=self.handle_tasks, args=( + self.tasks,)) + worker.daemon = True + self.workers.append(worker) + worker.start() + + def handle_tasks(self, tasks_queue): + """Worker thread.""" + while True: + qitem = tasks_queue.get() + if qitem is None: + # None in the queue indicates the parent want us to exit + tasks_queue.task_done() + break + self.run_one_item(qitem) + tasks_queue.task_done() + + def enqueue(self, query_samples): + idx = [q.index for q in query_samples] + query_id = [q.id for q in query_samples] + + if len(query_samples) < self.max_batchsize: + data, label = self.ds.get_samples(idx) + self.tasks.put(Item(query_id, idx, data, label)) + else: + bs = self.max_batchsize + for i in range(0, len(idx), bs): + ie = i + bs + + data, label = self.ds.get_samples(idx[i:ie]) + self.tasks.put(Item(query_id[i:ie], idx[i:ie], data, label)) + + def finish(self): + # exit all threads + for _ in self.workers: + self.tasks.put(None) + for worker in self.workers: + worker.join() + + +def add_results(final_results, name, result_dict, + result_list, took, show_accuracy=False): + percentiles = [50., 80., 90., 95., 99., 99.9] + buckets = np.percentile(result_list, percentiles).tolist() + buckets_str = ",".join(["{}:{:.4f}".format(p, b) + for p, b in zip(percentiles, buckets)]) + + if result_dict["total"] == 0: + result_dict["total"] = len(result_list) + # this is what we record for each run + result = { + "took": took, + "mean": np.mean(result_list), + "percentiles": {str(k): v for k, v in zip(percentiles, buckets)}, + "qps": len(result_list) / took, + "count": len(result_list), + "good_items": result_dict["good"], + "total_items": result_dict["total"], + } + acc_str = "" + if show_accuracy: + result["accuracy"] = 100. * result_dict["good"] / result_dict["total"] + acc_str = ", acc={:.3f}%".format(result["accuracy"]) + if "mAP" in result_dict: + result["mAP"] = 100. * result_dict["mAP"] + acc_str += ", mAP={:.3f}%".format(result["mAP"]) + if os.environ.get('CM_COGNATA_ACCURACY_DUMP_FILE', '') != '': + accuracy_file = os.environ['CM_COGNATA_ACCURACY_DUMP_FILE'] + with open(accuracy_file, "w") as f: + f.write("{:.3f}%".format(result["mAP"])) + + if "mAP_classes" in result_dict: + result['mAP_per_classes'] = result_dict["mAP_classes"] + acc_str += ", mAP_classes={}".format(result_dict["mAP_classes"]) + + # add the result to the result dict + final_results[name] = result + + # to stdout + print("{} qps={:.2f}, mean={:.4f}, time={:.3f}{}, queries={}, tiles={}".format( + name, result["qps"], result["mean"], took, acc_str, + len(result_list), buckets_str)) + + print('======================================================================') + +######################################################################### + + +def main(): + print('======================================================================') + + global last_timeing + args = get_args() + + log.info(args) + + # Find backend + backend = get_backend(args.backend) + + # Load model to backend (Grigori moved here before dataset + # since we get various info about pre-processing from the model) + + print('') + print('Loading model ...') + print('') + + model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs) + +# print (model.num_classes) +# print (model.image_size) + + # --count applies to accuracy mode only and can be used to limit the number of images + # for testing. + count_override = False + count = args.count + if count: + count_override = True + + # dataset to use + wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[args.dataset] +# if args.use_preprocessed_dataset: +# pre_proc=None + + print('') + print('Loading dataset and preprocessing if needed ...') + print('* Dataset path: {}'.format(args.dataset_path)) + print('* Preprocessed cache path: {}'.format(args.cache_dir)) + print('') + + ds = wanted_dataset(data_path=args.dataset_path, + image_list=args.dataset_list, + name=args.dataset, + pre_process=pre_proc, + use_cache=args.cache, + count=count, + cache_dir=args.cache_dir, + preprocessed_dir=args.preprocessed_dir, + threads=args.threads, + model_config=model.config, # For ABTF + model_num_classes=model.num_classes, # For ABTF + model_image_size=model.image_size, # For ABTF + **kwargs) + + # For ABTF - maybe find cleaner way + post_proc.encoder = ds.encoder + + final_results = { + "runtime": model.name(), + "version": model.version(), + "time": int(time.time()), + "args": vars(args), + "cmdline": str(args), + } + + mlperf_conf = os.path.abspath(args.mlperf_conf) + if not os.path.exists(mlperf_conf): + log.error("{} not found".format(mlperf_conf)) + sys.exit(1) + + user_conf = os.path.abspath(args.user_conf) + if not os.path.exists(user_conf): + log.error("{} not found".format(user_conf)) + sys.exit(1) + + audit_config = os.path.abspath(args.audit_conf) + + if args.output: + output_dir = os.path.abspath(args.output) + os.makedirs(output_dir, exist_ok=True) + os.chdir(output_dir) + + # + # make one pass over the dataset to validate accuracy + # + count = ds.get_item_count() + + # warmup + if os.environ.get('CM_ABTF_ML_MODEL_SKIP_WARMUP', + '').strip().lower() != 'yes': + ds.load_query_samples([0]) + for _ in range(5): + img, _ = ds.get_samples([0]) + _ = backend.predict({backend.inputs[0]: img}) + ds.unload_query_samples(None) + + scenario = SCENARIO_MAP[args.scenario] + runner_map = { + lg.TestScenario.SingleStream: RunnerBase, + lg.TestScenario.MultiStream: QueueRunner, + lg.TestScenario.Server: QueueRunner, + lg.TestScenario.Offline: QueueRunner + } + + runner = runner_map[scenario]( + model, + ds, + args.threads, + post_proc=post_proc, + max_batchsize=args.max_batchsize) + + def issue_queries(query_samples): + runner.enqueue(query_samples) + + def flush_queries(): + pass + + log_output_settings = lg.LogOutputSettings() + log_output_settings.outdir = output_dir + log_output_settings.copy_summary_to_stdout = False + log_settings = lg.LogSettings() + log_settings.enable_trace = args.debug + log_settings.log_output = log_output_settings + + settings = lg.TestSettings() + settings.FromConfig(mlperf_conf, args.model_name, args.scenario) + settings.FromConfig(user_conf, args.model_name, args.scenario) + settings.scenario = scenario + settings.mode = lg.TestMode.PerformanceOnly + if args.accuracy: + settings.mode = lg.TestMode.AccuracyOnly + if args.find_peak_performance: + settings.mode = lg.TestMode.FindPeakPerformance + + if args.time: + # override the time we want to run + settings.min_duration_ms = args.time * MILLI_SEC + settings.max_duration_ms = args.time * MILLI_SEC + + if args.qps: + qps = float(args.qps) + settings.server_target_qps = qps + settings.offline_expected_qps = qps + + if count_override: + settings.min_query_count = count + settings.max_query_count = count + + if args.samples_per_query: + settings.multi_stream_samples_per_query = args.samples_per_query + + if args.max_latency: + settings.server_target_latency_ns = int(args.max_latency * NANO_SEC) + settings.multi_stream_expected_latency_ns = int( + args.max_latency * NANO_SEC) + + performance_sample_count = args.performance_sample_count if args.performance_sample_count else min( + count, 500) + sut = lg.ConstructSUT(issue_queries, flush_queries) + qsl = lg.ConstructQSL( + count, + performance_sample_count, + ds.load_query_samples, + ds.unload_query_samples) + + log.info("starting {}".format(scenario)) + result_dict = {"good": 0, "total": 0, "scenario": str(scenario)} + runner.start_run(result_dict, args.accuracy) + + lg.StartTestWithLogSettings(sut, qsl, settings, log_settings, audit_config) + + if not last_timeing: + last_timeing = runner.result_timing + if args.accuracy: + post_proc.finalize(result_dict, ds, output_dir=args.output) + + add_results(final_results, "{}".format(scenario), + result_dict, last_timeing, time.time() - ds.last_loaded, args.accuracy) + + runner.finish() + lg.DestroyQSL(qsl) + lg.DestroySUT(sut) + # + # write final results + # + if args.output: + with open("results.json", "w") as f: + json.dump(final_results, f, sort_keys=True, indent=4) + if args.accuracy: + print('Saving model output examples ...') + files = glob.glob( + os.path.join( + args.dataset_path, + '10002_Urban_Clear_Morning', + 'Cognata_Camera_01_8M_png', + '*.png')) + files = sorted(files) + for pred_batch in runner.proc_results: + for pred in pred_batch: + f = files[pred['id']] + cls_threshold = 0.3 + img = Image.open(f).convert("RGB") + loc, label, prob = np.array( + pred['boxes']), np.array( + pred['labels']), np.array( + pred['scores']) + best = np.argwhere(prob > cls_threshold).squeeze(axis=1) + + loc = loc[best] + label = label[best] + prob = prob[best] + + # Update input image with boxes and predictions + output_img = cv2.imread(f) + if len(loc) > 0: + + loc = loc.astype(np.int32) + + for box, lb, pr in zip(loc, label, prob): + category = cognata_labels.label_info[lb] + color = cognata_labels.colors[lb] + + xmin, ymin, xmax, ymax = box + + cv2.rectangle( + output_img, (xmin, ymin), (xmax, ymax), color, 2) + + text_size = cv2.getTextSize( + category + " : %.2f" % + pr, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0] + + cv2.rectangle( + output_img, (xmin, ymin), (xmin + text_size[0] + 3, ymin + text_size[1] + 4), color, -1) + + cv2.putText( + output_img, category + " : %.2f" % pr, + (xmin, ymin + + text_size[1] + + 4), cv2.FONT_HERSHEY_PLAIN, 1, + (255, 255, 255), 1) + output = "{}_prediction.jpg".format(f[:-4]) + + d1 = os.path.join(os.path.dirname(output), 'output') + if not os.path.isdir(d1): + os.makedirs(d1) + + d2 = os.path.basename(output) + + output = os.path.join(d1, d2) + cv2.imwrite(output, output_img) + with open("preds.json", "w") as f: + json.dump(runner.proc_results, f, indent=4) + + +if __name__ == "__main__": + main() diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/user.conf b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/user.conf new file mode 100644 index 000000000..edffe6912 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/user.conf @@ -0,0 +1,6 @@ +# Please set these fields depending on the performance of your system to +# override default LoadGen settings. +*.SingleStream.target_latency = 10 +*.MultiStream.target_latency = 80 +*.Server.target_qps = 1.0 +*.Offline.target_qps = 1.0 diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive/COPYRIGHT.md b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive/COPYRIGHT.md new file mode 100644 index 000000000..a059b0c49 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive/COPYRIGHT.md @@ -0,0 +1,9 @@ +# Copyright Notice + +© 2024-2025 MLCommons. All Rights Reserved. + +This file is licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. A copy of the License can be obtained at: + +[Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) + +Unless required by applicable law or agreed to in writing, software distributed under the License is provided on an "AS IS" basis, without warranties or conditions of any kind, either express or implied. Please refer to the License for the specific language governing permissions and limitations under the License. diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive/_cm.yaml new file mode 100644 index 000000000..cfb22101f --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive/_cm.yaml @@ -0,0 +1,287 @@ +alias: app-mlperf-automotive +uid: f7488ce376484fd2 + +automation_alias: script +automation_uid: 5b4e0237da074764 + +category: "Modular MLPerf inference benchmark pipeline for ABTF model" + + +# User-friendly tags to find this CM script +tags: +- app +- app-mlperf-inference +- mlperf-inference +- abtf-inference + +predeps: no + +# Default environment +default_env: + CM_MLPERF_LOADGEN_MODE: accuracy + CM_MLPERF_LOADGEN_SCENARIO: Offline + CM_OUTPUT_FOLDER_NAME: test_results + CM_MLPERF_RUN_STYLE: test + CM_TEST_QUERY_COUNT: '10' + CM_MLPERF_QUANTIZATION: off + CM_MLPERF_SUT_NAME_IMPLEMENTATION_PREFIX: reference + CM_MLPERF_SUT_NAME_RUN_CONFIG_SUFFIX: '' + + +# Map script inputs to environment variables +input_mapping: + device: CM_MLPERF_DEVICE + count: CM_MLPERF_LOADGEN_QUERY_COUNT + docker: CM_RUN_DOCKER_CONTAINER + hw_name: CM_HW_NAME + imagenet_path: IMAGENET_PATH + max_batchsize: CM_MLPERF_LOADGEN_MAX_BATCHSIZE + mode: CM_MLPERF_LOADGEN_MODE + num_threads: CM_NUM_THREADS + threads: CM_NUM_THREADS + dataset: CM_MLPERF_VISION_DATASET_OPTION + model: CM_MLPERF_CUSTOM_MODEL_PATH + output_dir: OUTPUT_BASE_DIR + power: CM_MLPERF_POWER + power_server: CM_MLPERF_POWER_SERVER_ADDRESS + ntp_server: CM_MLPERF_POWER_NTP_SERVER + max_amps: CM_MLPERF_POWER_MAX_AMPS + max_volts: CM_MLPERF_POWER_MAX_VOLTS + regenerate_files: CM_REGENERATE_MEASURE_FILES + rerun: CM_RERUN + scenario: CM_MLPERF_LOADGEN_SCENARIO + test_query_count: CM_TEST_QUERY_COUNT + clean: CM_MLPERF_CLEAN_SUBMISSION_DIR + dataset_args: CM_MLPERF_EXTRA_DATASET_ARGS + target_qps: CM_MLPERF_LOADGEN_TARGET_QPS + target_latency: CM_MLPERF_LOADGEN_TARGET_LATENCY + offline_target_qps: CM_MLPERF_LOADGEN_OFFLINE_TARGET_QPS + server_target_qps: CM_MLPERF_LOADGEN_SERVER_TARGET_QPS + singlestream_target_latency: CM_MLPERF_LOADGEN_SINGLESTREAM_TARGET_LATENCY + multistream_target_latency: CM_MLPERF_LOADGEN_MULTISTREAM_TARGET_LATENCY + output: CM_MLPERF_OUTPUT_DIR + +# Env keys which are exposed to higher level scripts +new_env_keys: + - CM_MLPERF_* + - CM_OUTPUT_PREDICTIONS_PATH + +new_state_keys: + - cm-mlperf-inference-results* + +# Dependencies on other CM scripts +deps: + + # Detect host OS features + - tags: detect,os + + # Detect host CPU features + - tags: detect,cpu + + # Install system dependencies on a given host + - tags: get,sys-utils-cm + + # Detect/install python + - tags: get,python + names: + - python + - python3 + + # Use cmind inside CM scripts + - tags: get,generic-python-lib,_package.cmind + + - tags: get,mlperf,inference,utils + + +docker: + cm_repo: gateoverflow@cm4mlops + use_host_group_id: True + use_host_user_id: True + real_run: false + interactive: True + cm_repos: 'cm pull repo mlcommons@cm4abtf --checkout=poc' + deps: + - tags: get,abtf,scratch,space + mounts: + - "${{ CM_ABTF_SCRATCH_PATH_DATASETS }}:${{ CM_ABTF_SCRATCH_PATH_DATASETS }}" + + +# Variations to customize dependencies +variations: + + # Implementation + mlcommons-python: + group: implementation + default: true + env: + CM_MLPERF_PYTHON: 'yes' + CM_MLPERF_IMPLEMENTATION: reference + prehook_deps: + - names: + - python-reference-abtf-inference + - abtf-inference-implementation + tags: run-mlperf-inference,demo,abtf-model + skip_if_env: + CM_SKIP_RUN: + - yes + + + # Execution modes + fast: + group: execution-mode + env: + CM_FAST_FACTOR: '5' + CM_OUTPUT_FOLDER_NAME: fast_results + CM_MLPERF_RUN_STYLE: fast + + test: + group: execution-mode + default: true + env: + CM_OUTPUT_FOLDER_NAME: test_results + CM_MLPERF_RUN_STYLE: test + + valid: + group: execution-mode + env: + CM_OUTPUT_FOLDER_NAME: valid_results + CM_MLPERF_RUN_STYLE: valid + + + # ML engine + onnxruntime: + group: framework + env: + CM_MLPERF_BACKEND: onnxruntime + add_deps_recursive: + abtf-inference-implementation: + tags: _onnxruntime + + + onnxruntime,cpu: + env: + CM_MLPERF_BACKEND_VERSION: <<>> + + onnxruntime,cuda: + env: + CM_MLPERF_BACKEND_VERSION: <<>> + ONNXRUNTIME_PREFERRED_EXECUTION_PROVIDER: "CUDAExecutionProvider" + + + pytorch: + group: framework + default: true + env: + CM_MLPERF_BACKEND: pytorch + CM_MLPERF_BACKEND_VERSION: <<>> + add_deps_recursive: + abtf-inference-implementation: + tags: _pytorch + + + abtf-demo-model: + env: + CM_MODEL: retinanet + group: models + add_deps_recursive: + abtf-inference-implementation: + tags: _abtf-demo-model + + abtf-poc-model: + env: + CM_MODEL: retinanet + default: true + group: models + add_deps_recursive: + abtf-inference-implementation: + tags: _abtf-poc-model + docker: + deps: + - tags: get,dataset,raw,mlcommons-cognata,_abtf-poc + names: + - raw-dataset-mlcommons-cognata + enable_if_env: + CM_DATASET_MLCOMMONS_COGNATA_DOWNLOAD_IN_HOST: + - yes + + mounts: + - "${{ CM_DATASET_MLCOMMONS_COGNATA_PATH }}:${{ CM_DATASET_MLCOMMONS_COGNATA_PATH }}" + + + # Target devices + cpu: + group: device + default: true + env: + CM_MLPERF_DEVICE: cpu + CUDA_VISIBLE_DEVICES: '' + USE_CUDA: no + USE_GPU: no + add_deps_recursive: + abtf-inference-implementation: + tags: _cpu + + cuda: + group: device + env: + CM_MLPERF_DEVICE: gpu + USE_CUDA: yes + USE_GPU: yes + add_deps_recursive: + abtf-inference-implementation: + tags: _cuda + docker: + all_gpus: 'yes' + base_image: nvcr.io/nvidia/pytorch:24.03-py3 + + + + # Loadgen scenarios + offline: + env: + CM_MLPERF_LOADGEN_SCENARIO: Offline + add_deps_recursive: + abtf-inference-implementation: + tags: _offline + multistream: + env: + CM_MLPERF_LOADGEN_SCENARIO: MultiStream + add_deps_recursive: + abtf-inference-implementation: + tags: _multistream + singlestream: + group: loadgen-scenario + default: true + env: + CM_MLPERF_LOADGEN_SCENARIO: SingleStream + add_deps_recursive: + abtf-inference-implementation: + tags: _singlestream + server: + env: + CM_MLPERF_LOADGEN_SCENARIO: Server + add_deps_recursive: + abtf-inference-implementation: + tags: _server + + mvp-demo: + env: + CM_ABTF_MVP_DEMO: yes + CM_MLPERF_VISION_DATASET_OPTION: cognata-8mp-pt + CM_ABTF_ML_MODEL_CONFIG: baseline_8MP_ss_scales_all + CM_ABTF_NUM_CLASSES: 15 + CM_DATASET_MLCOMMONS_COGNATA_SERIAL_NUMBERS: 10002_Urban_Clear_Morning + CM_DATASET_MLCOMMONS_COGNATA_GROUP_NAMES: Cognata_Camera_01_8M + CM_ABTF_ML_MODEL_TRAINING_FORCE_COGNATA_LABELS: 'yes' + CM_ABTF_ML_MODEL_SKIP_WARMUP: 'yes' + + poc-demo: + env: + CM_ABTF_POC_DEMO: yes + CM_MLPERF_VISION_DATASET_OPTION: cognata-8mp-pt + CM_ABTF_ML_MODEL_CONFIG: baseline_8MP_ss_scales_fm1_5x5_all + CM_ABTF_NUM_CLASSES: 15 + CM_DATASET_MLCOMMONS_COGNATA_SERIAL_NUMBERS: 10002_Urban_Clear_Morning + CM_DATASET_MLCOMMONS_COGNATA_GROUP_NAMES: Cognata_Camera_01_8M + CM_ABTF_ML_MODEL_TRAINING_FORCE_COGNATA_LABELS: 'yes' + CM_ABTF_ML_MODEL_SKIP_WARMUP: 'yes' diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive/customize.py b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive/customize.py new file mode 100644 index 000000000..070f2b3c1 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive/customize.py @@ -0,0 +1,103 @@ +from cmind import utils +import os +import json +import shutil +import subprocess +import mlperf_utils +from log_parser import MLPerfLog + + +def preprocess(i): + + os_info = i['os_info'] + env = i['env'] + state = i['state'] + script_path = i['run_script_input']['path'] + + if 'cmd' in i['input']: + state['mlperf_inference_run_cmd'] = "cm run script " + \ + " ".join(i['input']['cmd']) + + state['mlperf-inference-implementation'] = {} + + run_state = i['run_script_input']['run_state'] + state['mlperf-inference-implementation']['script_id'] = run_state['script_id'] + \ + ":" + ",".join(run_state['script_variation_tags']) + + return {'return': 0} + + +def postprocess(i): + + env = i['env'] + state = i['state'] + + inp = i['input'] + os_info = i['os_info'] + + xsep = '^' if os_info['platform'] == 'windows' else '\\' + + env['CMD'] = '' + + # if env.get('CM_MLPERF_USER_CONF', '') == '': + # return {'return': 0} + + output_dir = env['CM_MLPERF_OUTPUT_DIR'] + mode = env['CM_MLPERF_LOADGEN_MODE'] + + model = env['CM_MODEL'] + model_full_name = env.get('CM_ML_MODEL_FULL_NAME', model) + + scenario = env['CM_MLPERF_LOADGEN_SCENARIO'] + + if not os.path.exists(output_dir) or not os.path.exists( + os.path.join(output_dir, "mlperf_log_summary.txt")): + # No output, fake_run? + return {'return': 0} + + mlperf_log = MLPerfLog(os.path.join(output_dir, "mlperf_log_detail.txt")) + if mode == "performance": + result = mlperf_log['result_mean_latency_ns'] / 1000000 + elif mode == "accuracy": + if not env.get( + 'CM_COGNATA_ACCURACY_DUMP_FILE'): # can happen while reusing old runs + env['CM_COGNATA_ACCURACY_DUMP_FILE'] = os.path.join( + output_dir, "accuracy.txt") + acc = "" + if os.path.exists(env['CM_COGNATA_ACCURACY_DUMP_FILE']): + with open(env['CM_COGNATA_ACCURACY_DUMP_FILE'], "r") as f: + acc = f.readline() + result = acc + else: + return {'return': 1, 'error': f"Unknown mode {mode}"} + + valid = {'performance': True, 'accuracy': True} # its POC + power_result = None # No power measurement in POC + + # result, valid, power_result = mlperf_utils.get_result_from_log(env['CM_MLPERF_LAST_RELEASE'], model, scenario, output_dir, mode) + + if not state.get('cm-mlperf-inference-results'): + state['cm-mlperf-inference-results'] = {} + if not state.get('cm-mlperf-inference-results-last'): + state['cm-mlperf-inference-results-last'] = {} + if not state['cm-mlperf-inference-results'].get( + state['CM_SUT_CONFIG_NAME']): + state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME']] = {} + if not state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME'] + ].get(model): + state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME']][model] = {} + if not state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME'] + ][model].get(scenario): + state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME'] + ][model][scenario] = {} + + state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME'] + ][model][scenario][mode] = result + state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME'] + ][model][scenario][mode + '_valid'] = valid.get(mode, False) + + state['cm-mlperf-inference-results-last'][mode] = result + state['cm-mlperf-inference-results-last'][mode + + '_valid'] = valid.get(mode, False) + + return {'return': 0} diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-amd/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-amd/_cm.yaml index 305578a17..f073011f8 100644 --- a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-amd/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-amd/_cm.yaml @@ -38,7 +38,7 @@ input_mapping: mlperf_conf: CM_MLPERF_CONF mode: CM_MLPERF_LOADGEN_MODE output_dir: CM_MLPERF_OUTPUT_DIR - performance_sample_count: CM_MLPERF_PERFORMANCE_SAMPLE_COUNT + performance_sample_count: CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT scenario: CM_MLPERF_LOADGEN_SCENARIO user_conf: CM_MLPERF_USER_CONF skip_preprocess: CM_SKIP_PREPROCESS_DATASET diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-dummy/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-dummy/_cm.yaml index a1f311cc7..1343835b6 100644 --- a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-dummy/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-dummy/_cm.yaml @@ -38,7 +38,7 @@ input_mapping: mlperf_conf: CM_MLPERF_CONF mode: CM_MLPERF_LOADGEN_MODE output_dir: CM_MLPERF_OUTPUT_DIR - performance_sample_count: CM_MLPERF_PERFORMANCE_SAMPLE_COUNT + performance_sample_count: CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT scenario: CM_MLPERF_LOADGEN_SCENARIO user_conf: CM_MLPERF_USER_CONF skip_preprocess: CM_SKIP_PREPROCESS_DATASET diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-intel/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-intel/_cm.yaml index 0975f0b0b..9a7c042d7 100644 --- a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-intel/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-intel/_cm.yaml @@ -48,7 +48,7 @@ input_mapping: mlperf_conf: CM_MLPERF_CONF mode: CM_MLPERF_LOADGEN_MODE output_dir: CM_MLPERF_OUTPUT_DIR - performance_sample_count: CM_MLPERF_PERFORMANCE_SAMPLE_COUNT + performance_sample_count: CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT scenario: CM_MLPERF_LOADGEN_SCENARIO user_conf: CM_MLPERF_USER_CONF skip_preprocess: CM_SKIP_PREPROCESS_DATASET diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-mlcommons-python/_cm.yaml index 85fddc989..250d2dc86 100644 --- a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-mlcommons-python/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-mlcommons-python/_cm.yaml @@ -26,10 +26,10 @@ default_env: CM_MLPERF_LOADGEN_SCENARIO: Offline CM_OUTPUT_FOLDER_NAME: test_results CM_MLPERF_RUN_STYLE: test - CM_TEST_QUERY_COUNT: '10' + CM_TEST_QUERY_COUNT: "10" CM_MLPERF_QUANTIZATION: off CM_MLPERF_SUT_NAME_IMPLEMENTATION_PREFIX: reference - CM_MLPERF_SUT_NAME_RUN_CONFIG_SUFFIX: '' + CM_MLPERF_SUT_NAME_RUN_CONFIG_SUFFIX: "" docker: real_run: False @@ -67,7 +67,6 @@ input_mapping: network: CM_NETWORK_LOADGEN sut_servers: CM_NETWORK_LOADGEN_SUT_SERVERS - # Duplicate CM environment variables to the ones used in native apps env_key_mappings: CM_HOST_: HOST_ @@ -89,7 +88,6 @@ new_state_keys: # Dependencies on other CM scripts deps: - # Detect host OS features - tags: detect,os @@ -102,8 +100,8 @@ deps: # Detect/install python - tags: get,python names: - - python - - python3 + - python + - python3 # Detect CUDA if required - tags: get,cuda,_cudnn @@ -111,21 +109,18 @@ deps: - cuda enable_if_env: CM_MLPERF_DEVICE: - - gpu + - gpu CM_MLPERF_BACKEND: - - onnxruntime - - tf - - tflite - - pytorch + - onnxruntime + - tf + - tflite + - pytorch # Detect TensorRT if required - tags: get,nvidia,tensorrt enable_if_env: CM_MLPERF_BACKEND: - - tensorrt - - - + - tensorrt ######################################################################## # Install ML engines via CM @@ -133,54 +128,54 @@ deps: ## Onnx CPU Runtime - tags: get,generic-python-lib,_onnxruntime names: - - ml-engine-onnxruntime - - onnxruntime + - ml-engine-onnxruntime + - onnxruntime enable_if_env: CM_MLPERF_BACKEND: - - onnxruntime - - tvm-onnx + - onnxruntime + - tvm-onnx CM_MLPERF_DEVICE: - - cpu - - rocm + - cpu + - rocm ## Onnx CUDA Runtime - tags: get,generic-python-lib,_onnxruntime_gpu names: - - ml-engine-onnxruntime-cuda + - ml-engine-onnxruntime-cuda enable_if_env: CM_MLPERF_BACKEND: - - onnxruntime - - tvm-onnx + - onnxruntime + - tvm-onnx CM_MLPERF_DEVICE: - - gpu + - gpu skip_if_env: CM_MODEL: - - 3d-unet-99 - - 3d-unet-99.9 + - 3d-unet-99 + - 3d-unet-99.9 ## resnet50 and 3d-unet need both onnxruntime and onnxruntime_gpu on cuda - tags: get,generic-python-lib,_onnxruntime enable_if_env: CM_MLPERF_BACKEND: - - onnxruntime + - onnxruntime CM_MLPERF_DEVICE: - - gpu + - gpu CM_MODEL: - - 3d-unet-99 - - 3d-unet-99.9 - - resnet50 + - 3d-unet-99 + - 3d-unet-99.9 + - resnet50 - tags: get,generic-python-lib,_onnxruntime_gpu env: CM_GENERIC_PYTHON_PIP_UNINSTALL_DEPS: "" enable_if_env: CM_MLPERF_BACKEND: - - onnxruntime + - onnxruntime CM_MLPERF_DEVICE: - - gpu + - gpu CM_MODEL: - - 3d-unet-99 - - 3d-unet-99.9 - - resnet50 + - 3d-unet-99 + - 3d-unet-99.9 + - resnet50 ## Pytorch (CPU) - tags: get,generic-python-lib,_torch @@ -194,74 +189,80 @@ deps: - dlrm-v2-99.9 enable_if_env: CM_MLPERF_BACKEND: - - pytorch - - tvm-pytorch + - pytorch + - tvm-pytorch CM_MLPERF_DEVICE: - - cpu - - rocm + - cpu + - rocm ## Pytorch (CUDA) - tags: get,generic-python-lib,_torch_cuda names: - - ml-engine-pytorch - - pytorch + - ml-engine-pytorch + - pytorch enable_if_env: CM_MLPERF_BACKEND: - - pytorch - - tvm-pytorch - - ray + - pytorch + - tvm-pytorch + - ray CM_MLPERF_DEVICE: - - gpu + - gpu ## Torchvision (CPU) - tags: get,generic-python-lib,_torchvision names: - - ml-engine-torchvision - - torchvision + - ml-engine-torchvision + - torchvision skip_if_env: CM_MODEL: - dlrm-v2-99 - dlrm-v2-99.9 + - rgat enable_if_env: CM_MLPERF_BACKEND: - - pytorch - - tvm-pytorch + - pytorch + - tvm-pytorch CM_MLPERF_DEVICE: - - cpu + - cpu ## Torchvision (CUDA) - tags: get,generic-python-lib,_torchvision_cuda names: - - ml-engine-torchvision - - torchvision + - ml-engine-torchvision + - torchvision + skip_if_env: + CM_MODEL: + - dlrm-v2-99 + - dlrm-v2-99.9 + - rgat enable_if_env: CM_MLPERF_BACKEND: - - pytorch - - tvm-pytorch - - ray + - pytorch + - tvm-pytorch + - ray CM_MLPERF_DEVICE: - - gpu + - gpu ## tensorrt - tags: get,generic-python-lib,_tensorrt names: - - ml-engine-tensorrt + - ml-engine-tensorrt enable_if_env: CM_MLPERF_BACKEND: - - ray + - ray ## torch_tensorrt - tags: get,generic-python-lib,_torch_tensorrt names: - - ml-engine-torch_tensorrt + - ml-engine-torch_tensorrt enable_if_env: CM_MLPERF_BACKEND: - - ray + - ray ## Ray - tags: get,generic-python-lib,_ray names: - - ray + - ray enable_if_env: CM_MLPERF_BACKEND: - ray @@ -271,7 +272,7 @@ deps: # async_timeout to be installed, so we need to install it manually. - tags: get,generic-python-lib,_async_timeout names: - - async_timeout + - async_timeout enable_if_env: CM_MLPERF_BACKEND: - ray @@ -279,49 +280,48 @@ deps: ## Transformers - tags: get,generic-python-lib,_transformers names: - - ml-engine-transformers + - ml-engine-transformers enable_if_env: CM_MODEL: - - bert-99 - - bert-99.9 - - gptj-99 - - gptj-99.9 + - bert-99 + - bert-99.9 + - gptj-99 + - gptj-99.9 ## Tensorflow - tags: get,generic-python-lib,_tensorflow names: - - ml-engine-tensorflow - - tensorflow + - ml-engine-tensorflow + - tensorflow enable_if_env: CM_MLPERF_BACKEND: - - tf + - tf ## NCNN - tags: get,generic-python-lib,_package.ncnn names: - - ml-engine-ncnn + - ml-engine-ncnn enable_if_env: CM_MLPERF_BACKEND: - - ncnn - + - ncnn + - tags: get,tensorflow,lib,_tflite names: - - ml-engine-tflite + - ml-engine-tflite enable_if_env: CM_MLPERF_BACKEND: - - tflite - + - tflite ######################################################################## - # Install ML models + # Install ML models - tags: get,ml-model,neural-magic,zoo # sets CM_MLPERF_CUSTOM_MODEL_PATH names: - - custom-ml-model + - custom-ml-model enable_if_env: CM_MLPERF_NEURALMAGIC_MODEL_ZOO_STUB: - - "on" + - "on" update_tags_from_env_with_prefix: "_model-stub.": - CM_MLPERF_NEURALMAGIC_MODEL_ZOO_STUB @@ -329,93 +329,91 @@ deps: ## ResNet50 - tags: get,ml-model,image-classification,resnet50 names: - - ml-model - - resnet50-model + - ml-model + - resnet50-model enable_if_env: CM_MODEL: - - resnet50 + - resnet50 skip_if_env: CM_MLPERF_CUSTOM_MODEL_PATH: - - "on" + - "on" ## RetinaNet - tags: get,ml-model,object-detection,retinanet names: - - ml-model - - retinanet-model + - ml-model + - retinanet-model enable_if_env: CM_MODEL: - - retinanet + - retinanet ## GPT-J - tags: get,ml-model,large-language-model,gptj names: - - ml-model - - gptj-model - - gpt-j-model + - ml-model + - gptj-model + - gpt-j-model enable_if_env: CM_MODEL: - - gptj-99 - - gptj-99.9 + - gptj-99 + - gptj-99.9 skip_if_env: CM_NETWORK_LOADGEN: - - lon - - + - lon ## RetinaNet (PyTorch weights, FP32) - tags: get,ml-model,object-detection,resnext50,fp32,_pytorch-weights names: - - ml-model - - retinanet-model + - ml-model + - retinanet-model enable_if_env: CM_MLPERF_BACKEND: - - pytorch + - pytorch CM_MLPERF_IMPLEMENTATION: - - nvidia + - nvidia CM_MODEL: - - retinanet + - retinanet ## BERT - tags: get,ml-model,language-processing,bert-large names: - - ml-model - - bert-model + - ml-model + - bert-model enable_if_env: CM_MODEL: - - bert-99 - - bert-99.9 + - bert-99 + - bert-99.9 skip_if_env: CM_MLPERF_CUSTOM_MODEL_PATH: - - "on" + - "on" ## SDXL - tags: get,ml-model,stable-diffusion,text-to-image,sdxl names: - - ml-model - - sdxl-model - - ml-model-float16 + - ml-model + - sdxl-model + - ml-model-float16 enable_if_env: CM_MODEL: - - stable-diffusion-xl + - stable-diffusion-xl skip_if_any_env: CM_MLPERF_CUSTOM_MODEL_PATH: - - "on" + - "on" skip_if_env: CM_RUN_STATE_DOCKER: - - 'yes' + - "yes" CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST: - - 'yes' + - "yes" ## LLAMA2-70B - tags: get,ml-model,llama2 names: - - ml-model - - llama2-model + - ml-model + - llama2-model enable_if_env: CM_MODEL: - - llama2-70b-99 - - llama2-70b-99.9 + - llama2-70b-99 + - llama2-70b-99.9 skip_if_any_env: CM_MLPERF_CUSTOM_MODEL_PATH: - "on" @@ -423,73 +421,86 @@ deps: - "on" skip_if_env: CM_MLPERF_MODEL_LLAMA2_70B_DOWNLOAD_TO_HOST: - - 'yes' + - "yes" CM_RUN_STATE_DOCKER: - - 'yes' + - "yes" ## mixtral-8x7b - tags: get,ml-model,mixtral names: - - ml-model - - mixtral-model + - ml-model + - mixtral-model enable_if_env: CM_MODEL: - - mixtral-8x7b + - mixtral-8x7b skip_if_any_env: CM_MLPERF_CUSTOM_MODEL_PATH: - - "on" + - "on" skip_if_env: CM_MLPERF_MODEL_MIXTRAL_8X7B_DOWNLOAD_TO_HOST: - - 'yes' + - "yes" CM_RUN_STATE_DOCKER: - - 'yes' + - "yes" ## 3d-unet - tags: get,ml-model,medical-imaging,3d-unet names: - - ml-model - - 3d-unet-model + - ml-model + - 3d-unet-model enable_if_env: CM_MODEL: - - 3d-unet-99 - - 3d-unet-99.9 + - 3d-unet-99 + - 3d-unet-99.9 ## Rnnt - tags: get,ml-model,speech-recognition,rnnt names: - - ml-model - - rnnt-model + - ml-model + - rnnt-model enable_if_env: CM_MODEL: - - rnnt + - rnnt ## Dlrm - tags: get,ml-model,recommendation,dlrm names: - - ml-model - - dlrm-model + - ml-model + - dlrm-model enable_if_env: CM_MODEL: - - dlrm-99 - - dlrm-99.9 - - dlrm-v2-99 - - dlrm-v2-99.9 + - dlrm-99 + - dlrm-99.9 + - dlrm-v2-99 + - dlrm-v2-99.9 skip_if_env: CM_ML_MODEL_FILE_WITH_PATH: - - 'on' - + - "on" ## RGAT - tags: get,ml-model,rgat names: - - ml-model - - rgat-model + - rgat-model enable_if_env: CM_MODEL: - - rgat + - rgat skip_if_env: RGAT_CHECKPOINT_PATH: - - 'on' + - "on" + + ## LLAMA3_1-405B + - tags: get,ml-model,llama3 + names: + - llama3-405b-model + - llama3-402b-model + enable_if_env: + CM_MODEL: + - llama3_1-405b + - llama3-405b + skip_if_env: + CM_USE_MODEL_FROM_HOST: + - "yes" + CM_RUN_STATE_DOCKER: + - "yes" ######################################################################## # Install datasets @@ -497,129 +508,153 @@ deps: ## ImageNet (small for tests) - tags: get,dataset,image-classification,imagenet,preprocessed names: - - imagenet-preprocessed + - imagenet-preprocessed enable_if_env: CM_MODEL: - - resnet50 + - resnet50 skip_if_env: CM_MLPERF_VISION_DATASET_OPTION: - - on + - on - tags: get,dataset,image-classification,imagenet,preprocessed,_pytorch names: - - imagenet-preprocessed + - imagenet-preprocessed enable_if_env: CM_MODEL: - - resnet50 + - resnet50 CM_MLPERF_VISION_DATASET_OPTION: - - imagenet_pytorch + - imagenet_pytorch - tags: get,dataset-aux,image-classification,imagenet-aux enable_if_env: CM_MODEL: - - resnet50 + - resnet50 ## Open Images for RetinaNet - tags: get,dataset,object-detection,open-images,openimages,preprocessed,_validation names: - - openimages-preprocessed + - openimages-preprocessed enable_if_env: CM_MODEL: - - retinanet + - retinanet ## CNNDM for Large Language Model - tags: get,dataset,cnndm,_validation names: - - cnndm-original + - cnndm-original enable_if_env: CM_MODEL: - - gptj-99 - - gptj-99.9 + - gptj-99 + - gptj-99.9 ## Squad for BERT - tags: get,dataset,squad,original names: - - squad-original + - squad-original enable_if_env: CM_MODEL: - - bert-99 - - bert-99.9 + - bert-99 + - bert-99.9 - tags: get,dataset-aux,squad-vocab enable_if_env: CM_MODEL: - - bert-99 - - bert-99.9 + - bert-99 + - bert-99.9 ## COCO for SDXL - tags: get,dataset,coco2014,_validation names: - - coco2014-preprocessed - - coco2014-dataset + - coco2014-preprocessed + - coco2014-dataset enable_if_env: CM_MODEL: - - stable-diffusion-xl + - stable-diffusion-xl ## OpenOrca for LLAMA2-70b - tags: get,preprocessed,dataset,openorca,_validation,_mlcommons names: - - openorca-preprocessed + - openorca-preprocessed enable_if_env: CM_MODEL: - - llama2-70b-99 - - llama2-70b-99.9 + - llama2-70b-99 + - llama2-70b-99.9 ## OpenOrca,mbxp,gsm8k combined dataset for mixtral-8x7b - tags: get,dataset-mixtral,openorca-mbxp-gsm8k-combined names: - - openorca-mbxp-gsm8k-combined-preprocessed + - openorca-mbxp-gsm8k-combined-preprocessed enable_if_env: CM_MODEL: - - mixtral-8x7b + - mixtral-8x7b skip_if_env: CM_MLPERF_DATASET_MIXTRAL_8X7B_DOWNLOAD_TO_HOST: - - 'yes' + - "yes" + CM_RUN_STATE_DOCKER: + - "yes" ## Kits19 for 3d-unet - tags: get,dataset,kits19,preprocessed names: - - kits19-preprocessed + - kits19-preprocessed enable_if_env: CM_MODEL: - - 3d-unet-99 - - 3d-unet-99.9 + - 3d-unet-99 + - 3d-unet-99.9 skip_if_env: CM_MLPERF_DATASET_3DUNET_DOWNLOAD_TO_HOST: - - 'yes' + - "yes" + CM_RUN_STATE_DOCKER: + - "yes" ## Librispeech for rnnt - tags: get,dataset,librispeech,preprocessed names: - - librispeech-preprocessed + - librispeech-preprocessed enable_if_env: CM_MODEL: - - rnnt + - rnnt ## Criteo for dlrm - tags: get,dataset,criteo,preprocessed,_mlc names: - - criteo-preprocessed + - criteo-preprocessed enable_if_env: CM_MODEL: - - dlrm-v2-99 - - dlrm-v2-99.9 + - dlrm-v2-99 + - dlrm-v2-99.9 skip_if_env: CM_CRITEO_PREPROCESSED_PATH: - - on + - on ## igbh for rgat - tags: get,dataset,mlperf,inference,igbh names: - - igbh-dataset - - illinois-graph-benchmark-heterogeneous + - igbh-dataset + - illinois-graph-benchmark-heterogeneous enable_if_env: CM_MODEL: - - rgat + - rgat + skip_if_env: + CM_RUN_STATE_DOCKER: + - "yes" + CM_USE_DATASET_FROM_HOST: + - "yes" + + ## llama3_1 dataset + - tags: get,dataset,mlperf,inference,llama3,_validation + names: + - llama3_1-dataset + - llama3-dataset + enable_if_env: + CM_MODEL: + - llama3_1-405b + - llama3-402b + skip_if_env: + CM_USE_DATASET_FROM_HOST: + - "yes" + CM_RUN_STATE_DOCKER: + - "yes" ######################################################################## # Install MLPerf inference dependencies @@ -627,47 +662,46 @@ deps: # Creates user conf for given SUT - tags: generate,user-conf,mlperf,inference names: - - user-conf-generator + - user-conf-generator skip_if_env: CM_RUN_STATE_DOCKER: - - 'yes' + - "yes" # Install MLPerf loadgen - tags: get,loadgen names: - - loadgen - - mlperf-inference-loadgen + - loadgen + - mlperf-inference-loadgen # Download MLPerf inference source - tags: get,mlcommons,inference,src names: - - inference-src - + - inference-src # Download MLPerf inference source - tags: get,mlcommons,inference,src env: - CM_GET_MLPERF_IMPLEMENTATION_ONLY: 'yes' + CM_GET_MLPERF_IMPLEMENTATION_ONLY: "yes" names: - - mlperf-implementation + - mlperf-implementation - tags: get,generic-python-lib,_package.psutil prehook_deps: - names: - - remote-run-cmds + - remote-run-cmds tags: remote,run,cmds enable_if_env: CM_ASSH_RUN_COMMANDS: - - "on" + - "on" -posthook_deps: +posthook_deps: - names: - - mlperf-runner + - mlperf-runner tags: benchmark-mlperf skip_if_env: CM_MLPERF_SKIP_RUN: - - "on" + - "on" post_deps: - tags: save,mlperf,inference,state @@ -683,7 +717,7 @@ variations: imagenet-accuracy-script: tags: _float32 env: - CM_MLPERF_PYTHON: 'yes' + CM_MLPERF_PYTHON: "yes" CM_MLPERF_IMPLEMENTATION: reference # ML engine @@ -807,14 +841,14 @@ variations: CM_MLPERF_BACKEND: deepsparse CM_MLPERF_BACKEND_VERSION: <<>> deps: - - tags: get,generic-python-lib,_deepsparse - skip_if_env: - CM_HOST_PLATFORM_FLAVOR: - - aarch64 - - tags: get,generic-python-lib,_package.deepsparse-nightly - enable_if_env: - CM_HOST_PLATFORM_FLAVOR: - - aarch64 + - tags: get,generic-python-lib,_deepsparse + skip_if_env: + CM_HOST_PLATFORM_FLAVOR: + - aarch64 + - tags: get,generic-python-lib,_package.deepsparse-nightly + enable_if_env: + CM_HOST_PLATFORM_FLAVOR: + - aarch64 add_deps_recursive: mlperf-implementation: version: deepsparse @@ -827,92 +861,91 @@ variations: CM_MLPERF_BACKEND: tvm-onnx CM_MLPERF_BACKEND_VERSION: <<>> deps: - - tags: get,generic-python-lib,_onnx - - tags: get,generic-python-lib,_numpy - version_max: "1.26.4" - version_max_usable: "1.26.4" - - tags: get,tvm - names: - - tvm - - tags: get,tvm-model,_onnx - names: - - tvm-model - update_tags_from_env_with_prefix: - _model.: - - CM_MODEL - - + - tags: get,generic-python-lib,_onnx + - tags: get,generic-python-lib,_numpy + version_max: "1.26.4" + version_max_usable: "1.26.4" + - tags: get,tvm + names: + - tvm + - tags: get,tvm-model,_onnx + names: + - tvm-model + update_tags_from_env_with_prefix: + _model.: + - CM_MODEL + tvm-tflite: group: framework env: CM_MLPERF_BACKEND: tvm-tflite CM_MLPERF_BACKEND_VERSION: <<>> deps: - - tags: get,generic-python-lib,_tflite - - tags: get,tvm - names: - - tvm - - tags: get,tvm-model,_tflite - names: - - tvm-model - update_tags_from_env_with_prefix: - _model.: - - CM_MODEL + - tags: get,generic-python-lib,_tflite + - tags: get,tvm + names: + - tvm + - tags: get,tvm-model,_tflite + names: + - tvm-model + update_tags_from_env_with_prefix: + _model.: + - CM_MODEL tvm-pytorch: group: framework env: CM_MLPERF_BACKEND: tvm-pytorch CM_MLPERF_BACKEND_VERSION: <<>> - CM_PREPROCESS_PYTORCH: 'yes' + CM_PREPROCESS_PYTORCH: "yes" MLPERF_TVM_TORCH_QUANTIZED_ENGINE: qnnpack deps: - - tags: get,generic-python-lib,_torch - names: - - torch - - pytorch - - tags: get,tvm - names: - - tvm - - tags: get,tvm-model,_pytorch - names: - - tvm-model - update_tags_from_env_with_prefix: - _model.: - - CM_MODEL + - tags: get,generic-python-lib,_torch + names: + - torch + - pytorch + - tags: get,tvm + names: + - tvm + - tags: get,tvm-model,_pytorch + names: + - tvm-model + update_tags_from_env_with_prefix: + _model.: + - CM_MODEL # Reference MLPerf models gptj-99.9: group: models base: - - gptj_ + - gptj_ env: CM_MODEL: gptj-99.9 gptj-99: group: models base: - - gptj_ + - gptj_ env: CM_MODEL: gptj-99 gptj_: deps: - - tags: get,generic-python-lib,_package.datasets - - tags: get,generic-python-lib,_package.attrs - - tags: get,generic-python-lib,_package.accelerate + - tags: get,generic-python-lib,_package.datasets + - tags: get,generic-python-lib,_package.attrs + - tags: get,generic-python-lib,_package.accelerate bert-99.9: group: models base: - - bert + - bert env: CM_MODEL: bert-99.9 bert-99: group: models base: - - bert + - bert env: CM_MODEL: bert-99 @@ -920,29 +953,29 @@ variations: env: CM_MLPERF_MODEL_SKIP_BATCHING: true deps: - - tags: get,generic-python-lib,_package.pydantic - - tags: get,generic-python-lib,_tokenization - - tags: get,generic-python-lib,_six - - tags: get,generic-python-lib,_package.absl-py - - tags: get,generic-python-lib,_protobuf - names: - - protobuf - version_max: "3.19" - enable_if_env: - CM_MLPERF_BACKEND: - - tf - - tflite - - tags: get,generic-python-lib,_boto3 - enable_if_env: - CM_MLPERF_BACKEND: - - pytorch - - tags: get,generic-python-lib,_torch - names: - - ml-engine-pytorch - - pytorch - skip_if_env: - CM_MLPERF_DEVICE: - - gpu + - tags: get,generic-python-lib,_package.pydantic + - tags: get,generic-python-lib,_tokenization + - tags: get,generic-python-lib,_six + - tags: get,generic-python-lib,_package.absl-py + - tags: get,generic-python-lib,_protobuf + names: + - protobuf + version_max: "3.19" + enable_if_env: + CM_MLPERF_BACKEND: + - tf + - tflite + - tags: get,generic-python-lib,_boto3 + enable_if_env: + CM_MLPERF_BACKEND: + - pytorch + - tags: get,generic-python-lib,_torch + names: + - ml-engine-pytorch + - pytorch + skip_if_env: + CM_MLPERF_DEVICE: + - gpu add_deps_recursive: inference-src: tags: _deeplearningexamples @@ -1087,7 +1120,7 @@ variations: - tags: get,generic-python-lib,_mxeval names: - rouge-score - + mixtral-8x7b,cuda: default_env: CM_MLPERF_LOADGEN_BATCH_SIZE: 1 @@ -1095,14 +1128,14 @@ variations: 3d-unet-99.9: group: models base: - - 3d-unet + - 3d-unet env: CM_MODEL: 3d-unet-99.9 3d-unet-99: group: models base: - - 3d-unet + - 3d-unet env: CM_MODEL: 3d-unet-99 @@ -1111,23 +1144,23 @@ variations: CM_TMP_IGNORE_MLPERF_QUERY_COUNT: true CM_MLPERF_MODEL_SKIP_BATCHING: true deps: - - tags: get,generic-python-lib,_package.nibabel - - tags: get,generic-python-lib,_package.scipy - names: - - scipy - version: 1.10.1 + - tags: get,generic-python-lib,_package.nibabel + - tags: get,generic-python-lib,_package.scipy + names: + - scipy + version: 1.10.1 dlrm-v2-99.9: group: models base: - - dlrm-v2_ + - dlrm-v2_ env: CM_MODEL: dlrm-v2-99.9 dlrm-v2-99: group: models base: - - dlrm-v2_ + - dlrm-v2_ env: CM_MODEL: dlrm-v2-99 @@ -1138,34 +1171,33 @@ variations: dlrm-v2_,pytorch: deps: - - tags: get,dlrm,src - names: - - dlrm-src - # to force the version - - tags: get,generic-python-lib,_torch - names: - - torch - - pytorch - - ml-engine-pytorch - - tags: get,generic-python-lib,_mlperf_logging - - tags: get,generic-python-lib,_opencv-python - - tags: get,generic-python-lib,_tensorboard - - tags: get,generic-python-lib,_protobuf - - tags: get,generic-python-lib,_scikit-learn - - tags: get,generic-python-lib,_tqdm - - tags: get,generic-python-lib,_onnx - - tags: get,generic-python-lib,_numpy - names: - - numpy - - tags: get,generic-python-lib,_package.pyre-extensions - - tags: get,generic-python-lib,_package.torchsnapshot - - tags: get,generic-python-lib,_package.torchmetrics - - tags: get,generic-python-lib,_package.torchrec - - tags: get,generic-python-lib,_package.fbgemm-gpu - - tags: get,generic-python-lib,_package.fbgemm-gpu-cpu - - tags: get,generic-python-lib,_package.fvcore - - tags: set,user,limit,_large-nofile - + - tags: get,dlrm,src + names: + - dlrm-src + # to force the version + - tags: get,generic-python-lib,_torch + names: + - torch + - pytorch + - ml-engine-pytorch + - tags: get,generic-python-lib,_mlperf_logging + - tags: get,generic-python-lib,_opencv-python + - tags: get,generic-python-lib,_tensorboard + - tags: get,generic-python-lib,_protobuf + - tags: get,generic-python-lib,_scikit-learn + - tags: get,generic-python-lib,_tqdm + - tags: get,generic-python-lib,_onnx + - tags: get,generic-python-lib,_numpy + names: + - numpy + - tags: get,generic-python-lib,_package.pyre-extensions + - tags: get,generic-python-lib,_package.torchsnapshot + - tags: get,generic-python-lib,_package.torchmetrics + - tags: get,generic-python-lib,_package.torchrec + - tags: get,generic-python-lib,_package.fbgemm-gpu + - tags: get,generic-python-lib,_package.fbgemm-gpu-cpu + - tags: get,generic-python-lib,_package.fvcore + - tags: set,user,limit,_large-nofile rnnt: group: models @@ -1174,77 +1206,128 @@ variations: CM_MLPERF_MODEL_SKIP_BATCHING: true CM_TMP_IGNORE_MLPERF_QUERY_COUNT: true deps: - - tags: get,generic-python-lib,_package.pydantic - version_max: "1.10.9" - - tags: get,generic-python-lib,_librosa - names: - - librosa - - tags: get,generic-python-lib,_inflect - - tags: get,generic-python-lib,_unidecode - - tags: get,generic-python-lib,_toml + - tags: get,generic-python-lib,_package.pydantic + version_max: "1.10.9" + - tags: get,generic-python-lib,_librosa + names: + - librosa + - tags: get,generic-python-lib,_inflect + - tags: get,generic-python-lib,_unidecode + - tags: get,generic-python-lib,_toml retinanet: group: models deps: - - tags: get,generic-python-lib,_opencv-python - - tags: get,generic-python-lib,_numpy - names: - - numpy - - tags: get,generic-python-lib,_pycocotools + - tags: get,generic-python-lib,_opencv-python + - tags: get,generic-python-lib,_numpy + names: + - numpy + - tags: get,generic-python-lib,_pycocotools env: CM_MODEL: retinanet - CM_MLPERF_USE_MLCOMMONS_RUN_SCRIPT: 'yes' - CM_MLPERF_LOADGEN_MAX_BATCHSIZE: '1' + CM_MLPERF_USE_MLCOMMONS_RUN_SCRIPT: "yes" + CM_MLPERF_LOADGEN_MAX_BATCHSIZE: "1" resnet50: group: models default: true env: CM_MODEL: resnet50 - CM_MLPERF_USE_MLCOMMONS_RUN_SCRIPT: 'yes' + CM_MLPERF_USE_MLCOMMONS_RUN_SCRIPT: "yes" deps: - - tags: get,generic-python-lib,_opencv-python - - tags: get,generic-python-lib,_numpy - names: - - numpy - - tags: get,generic-python-lib,_pycocotools + - tags: get,generic-python-lib,_opencv-python + - tags: get,generic-python-lib,_numpy + names: + - numpy + - tags: get,generic-python-lib,_pycocotools prehook_deps: - - tags: get,generic-python-lib,_protobuf - names: - - protobuf - version_max: "4.23.4" - version_max_usable: "4.23.4" - enable_if_env: - CM_MLPERF_BACKEND: - - tf - - tflite + - tags: get,generic-python-lib,_protobuf + names: + - protobuf + version_max: "4.23.4" + version_max_usable: "4.23.4" + enable_if_env: + CM_MLPERF_BACKEND: + - tf + - tflite rgat: group: models env: CM_MODEL: rgat + add_deps_recursive: + pytorch: + version_max: "2.4.0" + version_max_usable: "2.4.0" deps: - tags: get,generic-python-lib,_package.colorama - tags: get,generic-python-lib,_package.tqdm - tags: get,generic-python-lib,_package.requests - tags: get,generic-python-lib,_package.torchdata + - tags: get,generic-python-lib,_package.pybind11 + - tags: get,generic-python-lib,_package.PyYAML + - tags: get,generic-python-lib,_package.numpy + version_max: "1.26.4" + version_max_usable: "1.26.4" + - tags: get,generic-python-lib,_package.pydantic + - tags: get,generic-python-lib,_package.igb,_url.git+https://github.com/IllinoisGraphBenchmark/IGB-Datasets.git - tags: get,generic-python-lib,_package.torch-geometric + update_tags_from_env_with_prefix: + _find_links_url.: + - CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL - tags: get,generic-python-lib,_package.torch-scatter + update_tags_from_env_with_prefix: + _find_links_url.: + - CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL - tags: get,generic-python-lib,_package.torch-sparse + update_tags_from_env_with_prefix: + _find_links_url.: + - CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL + - tags: get,generic-python-lib,_package.dgl + update_tags_from_env_with_prefix: + _find_links_url.: + - CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL_DGL + + rgat,cuda: + env: + CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>.html" + CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL_DGL: "https://data.dgl.ai/wheels/torch-<<>>/cu121/repo.html" + + rgat,cpu: + env: + CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" + CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL_DGL: "https://data.dgl.ai/wheels/torch-<<>>/repo.html" + + llama3_1-405b: + group: models + env: + CM_MODEL: llama3_1-405b + adr: + pytorch: + version_max: 2.5.1 + CM_MODEL: llama3-402b + deps: + - tags: get,generic-python-lib,_package.torchvision + - tags: get,generic-python-lib,_package.torchaudio + - tags: get,generic-python-lib,_package.torch-geometric + - tags: get,generic-python-lib,_package.transformers + - tags: get,generic-python-lib,_package.sentencepiece + - tags: get,generic-python-lib,_package.accelerate + - tags: get,generic-python-lib,_package.vllm + env: + CM_GENERIC_PYTHON_PIP_EXTRA: "--upgrade" - tags: get,generic-python-lib,_package.pybind11 - - tags: get,generic-python-lib,_package.PyYAML - - tags: get,generic-python-lib,_package.pydantic - - tags: get,generic-python-lib,_package.igb,_url.git+https://github.com/IllinoisGraphBenchmark/IGB-Datasets.git - - tags: get,generic-python-lib,_package.dgl,_find_links_url.https://data.dgl.ai/wheels/torch-2.1/repo.html - enable_if_env: - CM_MLPERF_DEVICE: - - cpu - - tags: get,generic-python-lib,_package.dgl,_find_links_url.https://data.dgl.ai/wheels/torch-2.1/cu121/repo.html - enable_if_env: - CM_MLPERF_DEVICE: - - gpu + - tags: get,generic-python-lib,_package.pandas + version_max: 2.2.1 + + llama3_1-405b,cuda: + env: + CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>.html" + llama3_1-405b,cpu: + env: + CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" # Target devices cpu: @@ -1252,7 +1335,7 @@ variations: default: true env: CM_MLPERF_DEVICE: cpu - CUDA_VISIBLE_DEVICES: '' + CUDA_VISIBLE_DEVICES: "" USE_CUDA: no USE_GPU: no @@ -1299,8 +1382,7 @@ variations: default: true add_deps_recursive: ml-model: - tags: - _fp32 + tags: _fp32 env: CM_MLPERF_QUANTIZATION: off CM_MLPERF_MODEL_PRECISION: float32 @@ -1310,8 +1392,7 @@ variations: group: precision add_deps_recursive: ml-model-float16: - tags: - _fp16 + tags: _fp16 env: CM_MLPERF_QUANTIZATION: off CM_MLPERF_MODEL_PRECISION: float16 @@ -1321,8 +1402,7 @@ variations: group: precision add_deps_recursive: ml-model-float16: - tags: - _fp16 + tags: _fp16 env: CM_MLPERF_QUANTIZATION: off CM_MLPERF_MODEL_PRECISION: bfloat16 @@ -1334,8 +1414,7 @@ variations: CM_MLPERF_MODEL_PRECISION: int8 add_deps_recursive: ml-model: - tags: - _int8 + tags: _int8 quantized: alias: int8 @@ -1346,11 +1425,9 @@ variations: CM_MLPERF_LOADGEN_MAX_BATCHSIZE: "#" add_deps_recursive: ml-model: - tags: - _batch_size.# + tags: _batch_size.# tvm-model: - tags: - _batch_size.# + tags: _batch_size.# network-sut: group: network @@ -1382,6 +1459,6 @@ variations: loadgen: version: r2.1 env: - CM_RERUN: 'yes' - CM_SKIP_SYS_UTILS: 'yes' - CM_TEST_QUERY_COUNT: '100' + CM_RERUN: "yes" + CM_SKIP_SYS_UTILS: "yes" + CM_TEST_QUERY_COUNT: "100" diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-mlcommons-python/customize.py b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-mlcommons-python/customize.py index 87e09151b..fc1e8450b 100644 --- a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-mlcommons-python/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-mlcommons-python/customize.py @@ -79,7 +79,7 @@ def preprocess(i): str(env['CM_MLPERF_LOADGEN_BATCH_SIZE']) if env.get('CM_MLPERF_LOADGEN_QUERY_COUNT', '') != '' and not env.get('CM_TMP_IGNORE_MLPERF_QUERY_COUNT', False) and ( - env['CM_MLPERF_LOADGEN_MODE'] == 'accuracy' or 'gptj' in env['CM_MODEL'] or 'llama2' in env['CM_MODEL'] or 'mixtral' in env['CM_MODEL']) and env.get('CM_MLPERF_RUN_STYLE', '') != "valid": + env['CM_MLPERF_LOADGEN_MODE'] == 'accuracy' or 'gptj' in env['CM_MODEL'] or 'llama2' in env['CM_MODEL'] or 'mixtral' in env['CM_MODEL'] or 'llama3' in env['CM_MODEL']) and env.get('CM_MLPERF_RUN_STYLE', '') != "valid": env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --count " + \ env['CM_MLPERF_LOADGEN_QUERY_COUNT'] @@ -126,17 +126,19 @@ def preprocess(i): scenario_extra_options = '' NUM_THREADS = env['CM_NUM_THREADS'] - if int(NUM_THREADS) > 2 and env['CM_MLPERF_DEVICE'] == "gpu": + if int( + NUM_THREADS) > 2 and env['CM_MLPERF_DEVICE'] == "gpu" and env['CM_MODEL'] != "rgat": NUM_THREADS = "2" # Don't use more than 2 threads when run on GPU - if env['CM_MODEL'] in ['resnet50', 'retinanet', 'stable-diffusion-xl']: + if env['CM_MODEL'] in ['resnet50', 'retinanet', + 'stable-diffusion-xl', 'rgat']: scenario_extra_options += " --threads " + NUM_THREADS ml_model_name = env['CM_MODEL'] if 'CM_MLPERF_USER_CONF' in env: user_conf_path = env['CM_MLPERF_USER_CONF'] x = "" if os_info['platform'] == 'windows' else "'" - if 'llama2-70b' in env['CM_MODEL'] or "mixtral-8x7b" in env["CM_MODEL"]: + if 'llama2-70b' in env['CM_MODEL'] or "mixtral-8x7b" in env["CM_MODEL"] or "llama3" in env["CM_MODEL"]: scenario_extra_options += " --user-conf " + x + user_conf_path + x else: scenario_extra_options += " --user_conf " + x + user_conf_path + x @@ -397,7 +399,9 @@ def get_run_cmd_reference( env['CM_VLLM_SERVER_MODEL_NAME'] = env.get( "CM_VLLM_SERVER_MODEL_NAME") or "NousResearch/Meta-Llama-3-8B-Instruct" # env['CM_MLPERF_INFERENCE_API_SERVER'] = "http://localhost:8000" - cmd += f" --api-server {env['CM_MLPERF_INFERENCE_API_SERVER']} --model-path {env['CM_VLLM_SERVER_MODEL_NAME']} --api-model-name {env['CM_VLLM_SERVER_MODEL_NAME']} --vllm " + cmd += f""" --api-server {env['CM_MLPERF_INFERENCE_API_SERVER']} \ + --model-path {env['CM_VLLM_SERVER_MODEL_NAME']} \ + --api-model-name {env['CM_VLLM_SERVER_MODEL_NAME']} --vllm """ else: cmd += f" --model-path {env['LLAMA2_CHECKPOINT_PATH']}" @@ -496,15 +500,40 @@ def get_run_cmd_reference( # have to add the condition for running in debug mode or real run mode cmd = env['CM_PYTHON_BIN_WITH_PATH'] + " main.py " \ " --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + \ - " --dataset-path " + env['CM_IGBH_DATASET_PATH'] + \ - " --device " + device.replace("cuda", "cuda:0") + \ + " --dataset-path " + env['CM_DATASET_IGBH_PATH'] + \ + " --device " + device.replace("cuda", "gpu") + \ env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \ scenario_extra_options + mode_extra_options + \ " --output " + env['CM_MLPERF_OUTPUT_DIR'] + \ ' --dtype ' + dtype_rgat + \ - " --model-path " + env['RGAT_CHECKPOINT_PATH'] + \ - " --mlperf_conf " + \ - os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "mlperf.conf") + " --model-path " + env['RGAT_CHECKPOINT_PATH'] + + if env.get('CM_ACTIVATE_RGAT_IN_MEMORY', '') == "yes": + cmd += " --in-memory " + + elif "llama3" in env['CM_MODEL']: + env['RUN_DIR'] = os.path.join( + env['CM_MLPERF_INFERENCE_SOURCE'], + "language", + "llama3.1-405b") + + if int(env.get('CM_MLPERF_INFERENCE_TP_SIZE', '')) > 1: + env['VLLM_WORKER_MULTIPROC_METHOD'] = "spawn" + + cmd = env['CM_PYTHON_BIN_WITH_PATH'] + " main.py " \ + " --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + \ + " --dataset-path " + env['CM_DATASET_LLAMA3_PATH'] + \ + " --output-log-dir " + env['CM_MLPERF_OUTPUT_DIR'] + \ + ' --dtype ' + env['CM_MLPERF_MODEL_PRECISION'] + \ + " --model-path " + env['CM_ML_MODEL_LLAMA3_CHECKPOINT_PATH'] + \ + " --tensor-parallel-size " + env['CM_MLPERF_INFERENCE_TP_SIZE'] + \ + " --vllm " + + if env.get('CM_MLPERF_INFERENCE_NUM_WORKERS', '') != '': + cmd += f" --num-workers {env['CM_MLPERF_INFERENCE_NUM_WORKERS']}" + + cmd = cmd.replace("--count", "--total-sample-count") + cmd = cmd.replace("--max-batchsize", "--batch-size") if env.get('CM_NETWORK_LOADGEN', '') in ["lon", "sut"]: cmd = cmd + " " + "--network " + env['CM_NETWORK_LOADGEN'] diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-nvidia/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-nvidia/_cm.yaml index 0547783f6..5b96c7f65 100644 --- a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-nvidia/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-nvidia/_cm.yaml @@ -1823,7 +1823,7 @@ variations: default_variations: batch-size: batch_size.2048 env: - CM_MLPERF_PERFORMANCE_SAMPLE_COUNT: "2048" + CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT: "2048" a100,sxm,retinanet,offline,run_harness: default_variations: diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-nvidia/customize.py b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-nvidia/customize.py index 0ede381f8..3653c1f9a 100644 --- a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-nvidia/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-nvidia/customize.py @@ -48,14 +48,14 @@ def preprocess(i): make_command = env['MLPERF_NVIDIA_RUN_COMMAND'] if make_command == "prebuild": - cmds.append(f"make prebuild NETWORK_NODE=SUT") + cmds.append(f"""make prebuild NETWORK_NODE=SUT""") if env['CM_MODEL'] == "resnet50": target_data_path = os.path.join( env['MLPERF_SCRATCH_PATH'], 'data', 'imagenet') if not os.path.exists(target_data_path): cmds.append( - f"ln -sf {env['CM_DATASET_IMAGENET_PATH']} {target_data_path}") + f"""ln -sf {env['CM_DATASET_IMAGENET_PATH']} {target_data_path}""") model_path = os.path.join( env['MLPERF_SCRATCH_PATH'], @@ -64,11 +64,11 @@ def preprocess(i): 'resnet50_v1.onnx') if not os.path.exists(os.path.dirname(model_path)): - cmds.append(f"mkdir -p {os.path.dirname(model_path)}") + cmds.append(f"""mkdir -p {os.path.dirname(model_path)}""") if not os.path.exists(model_path): cmds.append( - f"ln -sf {env['CM_ML_MODEL_FILE_WITH_PATH']} {model_path}") + f"""ln -sf {env['CM_ML_MODEL_FILE_WITH_PATH']} {model_path}""") model_name = "resnet50" elif "bert" in env['CM_MODEL']: @@ -94,17 +94,17 @@ def preprocess(i): 'vocab.txt') if not os.path.exists(os.path.dirname(fp32_model_path)): - cmds.append(f"mkdir -p {os.path.dirname(fp32_model_path)}") + cmds.append(f"""mkdir -p {os.path.dirname(fp32_model_path)}""") if not os.path.exists(fp32_model_path): cmds.append( - f"ln -sf {env['CM_ML_MODEL_BERT_LARGE_FP32_PATH']} {fp32_model_path}") + f"""cp -r --remove-destination {env['CM_ML_MODEL_BERT_LARGE_FP32_PATH']} {fp32_model_path}""") if not os.path.exists(int8_model_path): cmds.append( - f"ln -sf {env['CM_ML_MODEL_BERT_LARGE_INT8_PATH']} {int8_model_path}") + f"""cp -r --remove-destination {env['CM_ML_MODEL_BERT_LARGE_INT8_PATH']} {int8_model_path}""") if not os.path.exists(vocab_path): cmds.append( - f"ln -sf {env['CM_ML_MODEL_BERT_VOCAB_FILE_WITH_PATH']} {vocab_path}") + f"""cp -r --remove-destination {env['CM_ML_MODEL_BERT_VOCAB_FILE_WITH_PATH']} {vocab_path}""") model_name = "bert" model_path = fp32_model_path @@ -123,9 +123,9 @@ def preprocess(i): # cmds.append("make download_data BENCHMARKS='stable-diffusion-xl'") env['CM_REQUIRE_COCO2014_DOWNLOAD'] = 'yes' cmds.append( - f"cp -r \\$CM_DATASET_PATH_ROOT/captions/captions.tsv {target_data_path}/captions_5k_final.tsv") + f"""cp -r \\$CM_DATASET_PATH_ROOT/captions/captions.tsv {target_data_path}/captions_5k_final.tsv""") cmds.append( - f"cp -r \\$CM_DATASET_PATH_ROOT/latents/latents.pt {target_data_path}/latents.pt") + f"""cp -r \\$CM_DATASET_PATH_ROOT/latents/latents.pt {target_data_path}/latents.pt""") fp16_model_path = os.path.join( env['MLPERF_SCRATCH_PATH'], 'models', @@ -135,7 +135,7 @@ def preprocess(i): 'stable_diffusion_fp16') if not os.path.exists(os.path.dirname(fp16_model_path)): - cmds.append(f"mkdir -p {os.path.dirname(fp16_model_path)}") + cmds.append(f"""mkdir -p {os.path.dirname(fp16_model_path)}""") if not os.path.exists(fp16_model_path): if os.path.islink(fp16_model_path): @@ -698,11 +698,15 @@ def preprocess(i): '') # will be ignored during build engine if "stable-diffusion" in env["CM_MODEL"]: - extra_build_engine_options_string += f" --model_path {os.path.join(env['MLPERF_SCRATCH_PATH'], 'models', 'SDXL/')}" + extra_build_engine_options_string += f""" --model_path { + os.path.join( + env['MLPERF_SCRATCH_PATH'], + 'models', + 'SDXL/')}""" run_config += " --no_audit_verify" - cmds.append(f"make {make_command} RUN_ARGS=' --benchmarks={model_name} --scenarios={scenario} {test_mode_string} {run_config} {extra_build_engine_options_string} {extra_run_options_string}'") + cmds.append(f"""make {make_command} RUN_ARGS=' --benchmarks={model_name} --scenarios={scenario} {test_mode_string} {run_config} {extra_build_engine_options_string} {extra_run_options_string}'""") run_cmd = " && ".join(cmds) env['CM_MLPERF_RUN_CMD'] = run_cmd diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-qualcomm/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-qualcomm/_cm.yaml index 8de84ac08..5e3de4302 100644 --- a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-qualcomm/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-qualcomm/_cm.yaml @@ -49,7 +49,7 @@ input_mapping: mlperf_conf: CM_MLPERF_CONF mode: CM_MLPERF_LOADGEN_MODE output_dir: CM_MLPERF_OUTPUT_DIR - performance_sample_count: CM_MLPERF_PERFORMANCE_SAMPLE_COUNT + performance_sample_count: CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT scenario: CM_MLPERF_LOADGEN_SCENARIO user_conf: CM_MLPERF_USER_CONF devices: CM_QAIC_DEVICES diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-redhat/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-redhat/_cm.yaml index 75f460f37..2c7011bd5 100644 --- a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-redhat/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-redhat/_cm.yaml @@ -38,7 +38,7 @@ input_mapping: mlperf_conf: CM_MLPERF_CONF mode: CM_MLPERF_LOADGEN_MODE output_dir: CM_MLPERF_OUTPUT_DIR - performance_sample_count: CM_MLPERF_PERFORMANCE_SAMPLE_COUNT + performance_sample_count: CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT scenario: CM_MLPERF_LOADGEN_SCENARIO user_conf: CM_MLPERF_USER_CONF skip_preprocess: CM_SKIP_PREPROCESS_DATASET diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference/_cm.yaml index 6e95a0082..7596b30ef 100644 --- a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference/_cm.yaml @@ -66,6 +66,8 @@ input_mapping: tp_size: CM_NVIDIA_TP_SIZE use_dataset_from_host: CM_USE_DATASET_FROM_HOST +predeps: False + # Duplicate CM environment variables to the ones used in native apps env_key_mappings: CM_HOST_: HOST_ @@ -219,6 +221,8 @@ variations: tags: _int32 cnndm-accuracy-script: tags: _int32 + llama3_1-405b-accuracy-script: + tags: _int32 env: CM_MLPERF_PYTHON: 'yes' CM_MLPERF_IMPLEMENTATION: mlcommons_python @@ -270,6 +274,10 @@ variations: default_variations: backend: pytorch + reference,llama3_1-405b: + default_variations: + backend: pytorch + reference,mixtral-8x7b: default_variations: backend: pytorch @@ -336,12 +344,16 @@ variations: - x86_64 docker: base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public + env: + CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp38-cp38-linux_x86_64.whl' - skip_if_env: CM_HOST_PLATFORM_FLAVOR: - x86_64 docker: base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v4.1-cuda12.4-pytorch24.04-ubuntu22.04-aarch64-GraceHopper-release + env: + CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp310-cp310-linux_aarch64.whl' @@ -403,7 +415,7 @@ variations: nvidia-original: docker: interactive: True - extra_run_args: ' --ulimit memlock=-1 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' + extra_run_args: ' --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v3.1-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-l4-public os: "ubuntu" os_version: "20.04" @@ -428,7 +440,7 @@ variations: CM_HOST_OS_VERSION: - 20.04 docker: - extra_run_args: ' --runtime=nvidia --ulimit memlock=-1 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' + extra_run_args: ' --runtime=nvidia --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' default_variations: backend: tensorrt @@ -765,6 +777,63 @@ variations: env: CM_MODEL: rgat + posthook_deps: + - enable_if_env: + CM_MLPERF_LOADGEN_MODE: + - accuracy + - all + CM_MLPERF_ACCURACY_RESULTS_DIR: + - 'on' + skip_if_env: + CM_MLPERF_IMPLEMENTATION: + - nvidia + names: + - mlperf-accuracy-script + - 3d-unet-accuracy-script + tags: run,accuracy,mlperf,_igbh + docker: + deps: + - tags: get,dataset,igbh + enable_if_env: + CM_USE_DATASET_FROM_HOST: + - 'yes' + names: + - igbh-original + - igbh-dataset + + llama3_1-405b: + group: + model + add_deps_recursive: + mlperf-inference-implementation: + tags: _llama3_1-405b + env: + CM_MODEL: + llama3_1-405b + posthook_deps: + - enable_if_env: + CM_MLPERF_LOADGEN_MODE: + - accuracy + - all + CM_MLPERF_ACCURACY_RESULTS_DIR: + - 'on' + skip_if_env: + CM_MLPERF_IMPLEMENTATION: + - nvidia + names: + - mlperf-accuracy-script + - llama3_1-405b-accuracy-script + tags: run,accuracy,mlperf,_dataset_llama3 + docker: + deps: + - tags: get,ml-model,llama3 + enable_if_env: + CM_USE_DATASET_FROM_HOST: + - 'yes' + names: + - llama3_1-405b + - llama3-405b + sdxl: group: @@ -1602,7 +1671,7 @@ variations: CM_SKIP_SYS_UTILS: 'yes' CM_REGENERATE_MEASURE_FILES: 'yes' env: - CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3-cp38-cp38-linux_x86_64.whl' + CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp38-cp38-linux_x86_64.whl' #uses public code for inference v4.1 @@ -1621,8 +1690,6 @@ variations: default_env: CM_SKIP_SYS_UTILS: 'yes' CM_REGENERATE_MEASURE_FILES: 'yes' - env: - CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp38-cp38-linux_x86_64.whl' r4.1_default: group: @@ -1643,6 +1710,27 @@ variations: CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp38-cp38-linux_x86_64.whl' CM_MLPERF_INFERENCE_VERSION: '4.1' + r5.0-dev_default: + group: + reproducibility + add_deps_recursive: + nvidia-inference-common-code: + version: r4.1 + tags: _mlcommons + nvidia-inference-server: + version: r4.1 + tags: _mlcommons + intel-harness: + tags: _v4.1 + inference-src: + version: r5.0 + default_env: + CM_SKIP_SYS_UTILS: 'yes' + CM_REGENERATE_MEASURE_FILES: 'yes' + env: + CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp38-cp38-linux_x86_64.whl' + + invalid_variation_combinations: - - retinanet @@ -1729,13 +1817,32 @@ input_description: debug: desc: "Debug MLPerf script" -gui: - title: "CM GUI for the MLPerf inference benchmark" +update_meta_if_env: + - enable_if_env: + CM_CONTAINER_TOOL: + - podman + # podman maps the host userid to the root user inside the container + docker: + use_host_group_id: False + use_host_user_id: False + pass_user_group: False #useful if docker is run by a different user from the one who built it and under the same group + default_env: + CM_DOCKER_USE_DEFAULT_USER: 'yes' + - skip_if_env: + CM_CONTAINER_TOOL: + - podman + docker: + use_host_group_id: True + use_host_user_id: True + pass_user_group: True #useful if docker is run by a different user from the one who built it and under the same group + - enable_if_env: + CM_HOST_OS_TYPE: + - linux + adr: + compiler: + tags: gcc docker: - use_host_group_id: True - use_host_user_id: True - pass_user_group: True #useful if docker is run by a different user fromt he one who built it and under the same group deps: - tags: get,mlperf,inference,results,dir,local names: @@ -1751,7 +1858,6 @@ docker: pre_run_cmds: #- cm pull repo && cm run script --tags=get,git,repo,_repo.https://github.com/GATEOverflow/inference_results_v4.0.git --update - cm pull repo - - cm rm cache --tags=inference,src -f mounts: - "${{ CM_DATASET_IMAGENET_PATH }}:${{ CM_DATASET_IMAGENET_PATH }}" - "${{ CM_DATASET_OPENIMAGES_PATH }}:${{ CM_DATASET_OPENIMAGES_PATH }}" @@ -1766,13 +1872,15 @@ docker: - "${{ CM_NVIDIA_LLAMA_DATASET_FILE_PATH }}:${{ CM_NVIDIA_LLAMA_DATASET_FILE_PATH }}" - "${{ SDXL_CHECKPOINT_PATH }}:${{ SDXL_CHECKPOINT_PATH }}" - "${{ CM_DATASET_KITS19_PREPROCESSED_PATH }}:${{ CM_DATASET_KITS19_PREPROCESSED_PATH }}" + - "${{ CM_DATASET_IGBH_PATH }}:${{ CM_DATASET_IGBH_PATH }}" + - "${{ CM_ML_MODEL_RGAT_CHECKPOINT_PATH }}:${{ CM_ML_MODEL_RGAT_CHECKPOINT_PATH }}" skip_run_cmd: 'no' shm_size: '32gb' interactive: True - extra_run_args: ' --dns 8.8.8.8 --dns 8.8.4.4 --ulimit memlock=-1 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' + extra_run_args: ' --dns 8.8.8.8 --dns 8.8.4.4 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' os: ubuntu - cm_repo: mlcommons@cm4mlops - cm_repo_branch: mlperf-inference + cm_repo: mlcommons@mlperf-automations + cm_repo_branch: dev real_run: False os_version: '22.04' docker_input_mapping: diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference/customize.py b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference/customize.py index 07fb7cb4e..30bbf0732 100644 --- a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference/customize.py @@ -129,6 +129,8 @@ def postprocess(i): if mode == "accuracy" or mode == "compliance" and env[ 'CM_MLPERF_LOADGEN_COMPLIANCE_TEST'] == "TEST01": + out_baseline_accuracy_string = f"""> {os.path.join(output_dir, "accuracy", "baseline_accuracy.txt")} """ + out_compliance_accuracy_string = f"""> {os.path.join(output_dir, "accuracy", "compliance_accuracy.txt")} """ if model == "resnet50": accuracy_filename = "accuracy-imagenet.py" accuracy_filepath = os.path.join(env['CM_MLPERF_INFERENCE_CLASSIFICATION_AND_DETECTION_PATH'], "tools", @@ -158,6 +160,17 @@ def postprocess(i): datatype_option = " --output_dtype " + \ env['CM_SQUAD_ACCURACY_DTYPE'] + elif 'rgat' in model: + accuracy_filename = "accuracy_igbh.py" + accuracy_filepath = os.path.join( + env['CM_MLPERF_INFERENCE_RGAT_PATH'], "tools", accuracy_filename) + dataset_args = " --dataset-path '" + env['CM_DATASET_IGBH_PATH'] + "' --dataset-size '" + \ + env['CM_DATASET_IGBH_SIZE'] + "'" + accuracy_log_file_option_name = " --mlperf-accuracy-file " + datatype_option = "" + out_baseline_accuracy_string = f""" --output-file {os.path.join(output_dir, "accuracy", "baseline_accuracy.txt")} """ + out_compliance_accuracy_string = f""" --output-file {os.path.join(output_dir, "accuracy", "compliance_accuracy.txt")} """ + elif 'stable-diffusion-xl' in model: pass # No compliance check for now elif 'gpt' in model: @@ -367,7 +380,7 @@ def postprocess(i): host_info['system_name'] = env['CM_HOST_SYSTEM_NAME'] # Check CM automation repository - repo_name = 'mlcommons@cm4mlops' + repo_name = 'mlcommons@mlperf-automations' repo_hash = '' r = cm.access({'action': 'find', 'automation': 'repo', 'artifact': 'mlcommons@cm4mlops,9e97bb72b0474657'}) @@ -401,9 +414,7 @@ def postprocess(i): cmd = "" xcmd = "" - readme_init = "This experiment is generated using the [MLCommons Collective Mind automation framework (CM)](https://github.com/mlcommons/cm4mlops).\n\n" - - readme_init += "*Check [CM MLPerf docs](https://docs.mlcommons.org/inference) for more details.*\n\n" + readme_init = "*Check [CM MLPerf docs](https://docs.mlcommons.org/inference) for more details.*\n\n" readme_body = "## Host platform\n\n* OS version: {}\n* CPU version: {}\n* Python version: {}\n* MLCommons CM version: {}\n\n".format(platform.platform(), platform.processor(), sys.version, cm.__version__) @@ -513,6 +524,7 @@ def postprocess(i): cmd = "cd " + TEST01_DIR + " && bash " + SCRIPT_PATH + " " + os.path.join(ACCURACY_DIR, "mlperf_log_accuracy.json") + " " + \ os.path.join(COMPLIANCE_DIR, "mlperf_log_accuracy.json") env['CMD'] = cmd + print(cmd) r = automation.run_native_script( {'run_script_input': run_script_input, 'env': env, 'script_name': 'verify_accuracy'}) if r['return'] > 0: @@ -527,9 +539,11 @@ def postprocess(i): print("\nDeterministic TEST01 failed... Trying with non-determinism.\n") # #Normal test failed, trying the check with non-determinism + baseline_accuracy_file = os.path.join( + TEST01_DIR, "mlperf_log_accuracy_baseline.json") CMD = "cd " + ACCURACY_DIR + " && " + env['CM_PYTHON_BIN_WITH_PATH'] + ' ' + accuracy_filepath + accuracy_log_file_option_name + \ - os.path.join(TEST01_DIR, "mlperf_log_accuracy_baseline.json") + dataset_args + datatype_option + " > " + \ - os.path.join(OUTPUT_DIR, "baseline_accuracy.txt") + baseline_accuracy_file + ' ' + dataset_args + \ + datatype_option + out_baseline_accuracy_string env['CMD'] = CMD r = automation.run_native_script( @@ -537,9 +551,13 @@ def postprocess(i): if r['return'] > 0: return r + if os.stat(baseline_accuracy_file).st_size == 0: + return {'return': 1, + 'error': f"{baseline_accuracy_file} is empty"} + CMD = "cd " + ACCURACY_DIR + " && " + env['CM_PYTHON_BIN_WITH_PATH'] + ' ' + accuracy_filepath + accuracy_log_file_option_name + \ - os.path.join(TEST01_DIR, "mlperf_log_accuracy.json") + dataset_args + datatype_option + " > " + \ - os.path.join(OUTPUT_DIR, "compliance_accuracy.txt") + os.path.join(TEST01_DIR, "mlperf_log_accuracy.json") + \ + dataset_args + datatype_option + out_compliance_accuracy_string env['CMD'] = CMD r = automation.run_native_script( diff --git a/cmx4mlops/cmx4mlops/repo/script/build-docker-image/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/build-docker-image/_cm.yaml index a9dc8cb67..8fd7c2571 100644 --- a/cmx4mlops/cmx4mlops/repo/script/build-docker-image/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/build-docker-image/_cm.yaml @@ -38,6 +38,9 @@ input_mapping: new_env_keys: - CM_DOCKER_* +deps: + - tags: get,docker + prehook_deps: - enable_if_env: CM_BUILD_DOCKERFILE: diff --git a/cmx4mlops/cmx4mlops/repo/script/build-docker-image/customize.py b/cmx4mlops/cmx4mlops/repo/script/build-docker-image/customize.py index 4746e98c3..4f2f1ac9e 100644 --- a/cmx4mlops/cmx4mlops/repo/script/build-docker-image/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/build-docker-image/customize.py @@ -48,14 +48,15 @@ def preprocess(i): # env['CM_BUILD_DOCKERFILE'] = "no" # if env.get("CM_DOCKER_IMAGE_REPO", "") == '': - env['CM_DOCKER_IMAGE_REPO'] = "local" + env['CM_DOCKER_IMAGE_REPO'] = "localhost/local" docker_image_name = env.get('CM_DOCKER_IMAGE_NAME', '') if docker_image_name == '': docker_image_name = "cm-script-" + \ env.get('CM_DOCKER_RUN_SCRIPT_TAGS', '').replace( ',', '-').replace('_', '-') - env['CM_DOCKER_IMAGE_NAME'] = docker_image_name + + env['CM_DOCKER_IMAGE_NAME'] = docker_image_name.lower() if env.get("CM_DOCKER_IMAGE_TAG", "") == '': env['CM_DOCKER_IMAGE_TAG'] = "latest" @@ -76,7 +77,8 @@ def preprocess(i): # Prepare CMD to build image XCMD = [ - 'docker build ' + env.get('CM_DOCKER_CACHE_ARG', ''), + f'{env["CM_CONTAINER_TOOL"]} build ' + + env.get('CM_DOCKER_CACHE_ARG', ''), ' ' + build_args, ' -f "' + dockerfile_path + '"', ' -t "' + image_name, diff --git a/cmx4mlops/cmx4mlops/repo/script/build-dockerfile/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/build-dockerfile/_cm.yaml index 7535311ea..9f91c0775 100644 --- a/cmx4mlops/cmx4mlops/repo/script/build-dockerfile/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/build-dockerfile/_cm.yaml @@ -19,7 +19,7 @@ default_env: ' CM_DOCKER_OS: ubuntu CM_DOCKER_NOT_PULL_UPDATE: False - CM_MLOPS_REPO_BRANCH: mlperf-inference + CM_MLOPS_REPO_BRANCH: dev input_mapping: build: CM_BUILD_DOCKER_IMAGE @@ -57,6 +57,11 @@ input_mapping: new_env_keys: - CM_DOCKERFILE_* +deps: + - tags: get,docker + names: + - docker + post_deps: - enable_if_env: CM_BUILD_DOCKER_IMAGE: diff --git a/cmx4mlops/cmx4mlops/repo/script/build-dockerfile/customize.py b/cmx4mlops/cmx4mlops/repo/script/build-dockerfile/customize.py index 3fdd1613e..e50af9a53 100644 --- a/cmx4mlops/cmx4mlops/repo/script/build-dockerfile/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/build-dockerfile/customize.py @@ -142,7 +142,7 @@ def preprocess(i): print( f"Converted repo format from {env['CM_MLOPS_REPO']} to {cm_mlops_repo}") else: - cm_mlops_repo = "mlcommons@cm4mlops" + cm_mlops_repo = "mlcommons@mlperf-automations" cm_mlops_repo_branch_string = f" --branch={env['CM_MLOPS_REPO_BRANCH']}" @@ -183,7 +183,7 @@ def preprocess(i): shell = get_value(env, config, 'SHELL', 'CM_DOCKER_IMAGE_SHELL') if shell: - f.write('SHELL ' + shell + EOL) + # f.write('SHELL ' + shell + EOL) f.write(EOL) for arg in config['ARGS_DEFAULT']: @@ -261,7 +261,12 @@ def preprocess(i): docker_user = get_value(env, config, 'USER', 'CM_DOCKER_USER') docker_group = get_value(env, config, 'GROUP', 'CM_DOCKER_GROUP') - if docker_user: + if env.get('CM_CONTAINER_TOOL', '') == 'podman' and env.get( + 'CM_DOCKER_USE_DEFAULT_USER', '') == '': + env['CM_DOCKER_USE_DEFAULT_USER'] = 'yes' + + if docker_user and str(env.get('CM_DOCKER_USE_DEFAULT_USER', '')).lower() not in [ + "yes", "1", "true"]: f.write('RUN groupadd -g $GID -o ' + docker_group + EOL) @@ -277,14 +282,20 @@ def preprocess(i): ' ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers' + EOL) f.write('USER ' + docker_user + ":" + docker_group + EOL) + f.write('ENV HOME=/home/cmuser' + EOL) + + else: + f.write('ENV HOME=/root' + EOL) dockerfile_env = env.get('CM_DOCKERFILE_ENV', {}) dockerfile_env_input_string = "" for docker_env_key in dockerfile_env: dockerfile_env_input_string = dockerfile_env_input_string + " --env." + \ docker_env_key + "=" + str(dockerfile_env[docker_env_key]) + workdir = get_value(env, config, 'WORKDIR', 'CM_DOCKER_WORKDIR') - if workdir: + if workdir and ("/home/cmuser" not in workdir or str(env.get('CM_DOCKER_USE_DEFAULT_USER', '')).lower() not in [ + "yes", "1", "true"]): f.write('WORKDIR ' + workdir + EOL) f.write(EOL + '# Install python packages' + EOL) @@ -292,9 +303,10 @@ def preprocess(i): docker_use_virtual_python = env.get('CM_DOCKER_USE_VIRTUAL_PYTHON', "yes") if str(docker_use_virtual_python).lower() not in ["no", "0", "false"]: - f.write('RUN {} -m venv /home/cmuser/venv/cm'.format(python) + " " + EOL) - f.write('ENV PATH="/home/cmuser/venv/cm/bin:$PATH"' + EOL) + f.write('RUN {} -m venv $HOME/venv/cm'.format(python) + " " + EOL) + f.write('ENV PATH="$HOME/venv/cm/bin:$PATH"' + EOL) # f.write('RUN . /opt/venv/cm/bin/activate' + EOL) + f.write( 'RUN {} -m pip install '.format(python) + " ".join( @@ -310,7 +322,7 @@ def preprocess(i): f.write(EOL + '# Download CM repo for scripts' + EOL) if use_copy_repo: - docker_repo_dest = "/home/cmuser/CM/repos/mlcommons@cm4mlops" + docker_repo_dest = "$HOME/CM/repos/mlcommons@mlperf-automations" f.write( f'COPY --chown=cmuser:cm {relative_repo_path} {docker_repo_dest}' + EOL) @@ -390,6 +402,26 @@ def preprocess(i): if run_cmd_extra != '': x += ' ' + run_cmd_extra + if env.get('CM_DOCKER_RUN_SCRIPT_TAGS', '') != '' and str(env.get( + 'CM_DOCKER_ADD_DEPENDENT_SCRIPTS_RUN_COMMANDS', '')).lower() in ["yes", "1", "true"]: + cm_input = {'action': 'run', + 'automation': 'script', + 'tags': f"""{env['CM_DOCKER_RUN_SCRIPT_TAGS']}""", + 'print_deps': True, + 'quiet': True, + 'silent': True, + 'fake_run': True, + 'fake_deps': True + } + r = self_module.cmind.access(cm_input) + if r['return'] > 0: + return r + print_deps = r['new_state']['print_deps'] + fake_run_str = " --fake_run" if env.get('CM_DOCKER_FAKE_DEPS') else "" + cmds = ["RUN " + dep for dep in print_deps] + for cmd in cmds: + f.write(cmd + fake_run_str + EOL) + f.write(x + EOL) # fake_run to install the dependent scripts and caching them diff --git a/cmx4mlops/cmx4mlops/repo/script/draw-graph-from-json-data/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/draw-graph-from-json-data/_cm.yaml index 4cea12c42..eb1d1a157 100644 --- a/cmx4mlops/cmx4mlops/repo/script/draw-graph-from-json-data/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/draw-graph-from-json-data/_cm.yaml @@ -19,3 +19,4 @@ deps: - python3 - tags: get,generic-python-lib,_package.networkx - tags: get,generic-python-lib,_package.matplotlib + - tags: get,generic-python-lib,_package.typing_extensions diff --git a/cmx4mlops/cmx4mlops/repo/script/generate-mlperf-inference-submission/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/generate-mlperf-inference-submission/_cm.yaml index 32003a1b3..013997df9 100644 --- a/cmx4mlops/cmx4mlops/repo/script/generate-mlperf-inference-submission/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/generate-mlperf-inference-submission/_cm.yaml @@ -7,6 +7,7 @@ default_env: CM_MLPERF_RUN_STYLE: valid CM_MLPERF_SUBMISSION_DIR_SHARED: 'yes' CM_RUN_MLPERF_ACCURACY: 'on' +predeps: False deps: - names: - python @@ -31,8 +32,8 @@ deps: - 'on' tags: get,mlperf,submission,dir docker: - cm_repo: mlcommons@cm4mlops - cm_repo_branch: mlperf-inference + cm_repo: mlcommons@mlperf-automations + cm_repo_branch: dev deps: - names: get-mlperf-inference-results-dir skip_if_env: @@ -68,6 +69,7 @@ input_mapping: device: CM_MLPERF_DEVICE division: CM_MLPERF_SUBMISSION_DIVISION duplicate: CM_MLPERF_DUPLICATE_SCENARIO_RESULTS + extra_checker_args: CM_MLPERF_SUBMISSION_CHECKER_EXTRA_ARG hw_name: CM_HW_NAME hw_notes_extra: CM_MLPERF_SUT_HW_NOTES_EXTRA infer_scenario_results: CM_MLPERF_DUPLICATE_SCENARIO_RESULTS @@ -84,6 +86,7 @@ input_mapping: sw_notes_extra: CM_MLPERF_SUT_SW_NOTES_EXTRA tar: CM_TAR_SUBMISSION_DIR get_platform_details: CM_GET_PLATFORM_DETAILS + version: CM_MLPERF_SUBMISSION_CHECKER_VERSION post_deps: - enable_if_env: CM_RUN_MLPERF_ACCURACY: @@ -99,9 +102,9 @@ post_deps: - 'yes' - true tags: preprocess,mlperf,submission -- enable_if_env: +- skip_if_env: CM_RUN_SUBMISSION_CHECKER: - - 'yes' + - 'no' names: - mlperf-inference-submission-checker - submission-checker diff --git a/cmx4mlops/cmx4mlops/repo/script/generate-mlperf-inference-submission/customize.py b/cmx4mlops/cmx4mlops/repo/script/generate-mlperf-inference-submission/customize.py index e6e350728..02161eb20 100644 --- a/cmx4mlops/cmx4mlops/repo/script/generate-mlperf-inference-submission/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/generate-mlperf-inference-submission/customize.py @@ -171,11 +171,15 @@ def generate_submission(env, state, inp, submission_division): print('* MLPerf inference submitter: {}'.format(submitter)) if env.get('CM_MLPERF_SUT_SW_NOTES_EXTRA', '') != '': - sw_notes = f"{system_meta_tmp['sw_notes']} {env['CM_MLPERF_SUT_SW_NOTES_EXTRA']}" + sw_notes = f"""{ + system_meta_tmp['sw_notes']} { + env['CM_MLPERF_SUT_SW_NOTES_EXTRA']}""" system_meta_tmp['sw_notes'] = sw_notes if env.get('CM_MLPERF_SUT_HW_NOTES_EXTRA', '') != '': - hw_notes = f"{system_meta_tmp['hw_notes']} {env['CM_MLPERF_SUT_HW_NOTES_EXTRA']}" + hw_notes = f"""{ + system_meta_tmp['hw_notes']} { + env['CM_MLPERF_SUT_HW_NOTES_EXTRA']}""" system_meta_tmp['hw_notes'] = hw_notes path_submission = os.path.join(path_submission_division, submitter) @@ -197,7 +201,7 @@ def generate_submission(env, state, inp, submission_division): result_path, 'system_meta.json') # checks for json file containing system meta sut_info = { - "hardware_name": None, + "system_name": None, "implementation": None, "device": None, "framework": None, @@ -283,7 +287,7 @@ def generate_submission(env, state, inp, submission_division): {model: returned_model_name}) if check_dict_filled(sut_info.keys(), sut_info): - system = sut_info["hardware_name"] + system = env.get('CM_HW_NAME', sut_info["system_name"]) implementation = sut_info["implementation"] device = sut_info["device"] framework = sut_info["framework"].replace(" ", "_") @@ -308,6 +312,10 @@ def generate_submission(env, state, inp, submission_division): system_path = os.path.join(path_submission, "systems") submission_system_path = system_path + if not os.path.isdir(submission_path): + os.makedirs(submission_path) + if not os.path.isdir(measurement_path): + os.makedirs(measurement_path) if not os.path.isdir(submission_system_path): os.makedirs(submission_system_path) system_file = os.path.join(submission_system_path, sub_res + ".json") @@ -585,8 +593,11 @@ def generate_submission(env, state, inp, submission_division): os.makedirs(target) for log_file in os.listdir( compliance_accuracy_run_path): - if log_file.startswith( - "mlperf_log_accuracy.json") or log_file.endswith("accuracy.txt"): + log_file_name = os.path.basename( + log_file) + # print(os.path.join(compliance_accuracy_run_path, log_file)) + if log_file_name in [ + "mlperf_log_accuracy.json", "accuracy.txt", "baseline_accuracy.txt", "compliance_accuracy.txt"]: shutil.copy( os.path.join( compliance_accuracy_run_path, log_file), os.path.join( @@ -735,6 +746,8 @@ def postprocess(i): # submission_generation function if env.get('CM_MLPERF_SUBMISSION_DIVISION', '') == '': r = generate_submission(env, state, inp, submission_division="") + if r['return'] > 0: + return r else: for submission_division in submission_divisions: r = generate_submission(env, state, inp, submission_division) diff --git a/cmx4mlops/cmx4mlops/repo/script/generate-mlperf-inference-user-conf/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/generate-mlperf-inference-user-conf/_cm.yaml index 99fc35995..c19bdcba3 100644 --- a/cmx4mlops/cmx4mlops/repo/script/generate-mlperf-inference-user-conf/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/generate-mlperf-inference-user-conf/_cm.yaml @@ -49,7 +49,7 @@ input_mapping: server_target_qps: CM_MLPERF_LOADGEN_SERVER_TARGET_QPS singlestream_target_latency: CM_MLPERF_LOADGEN_SINGLESTREAM_TARGET_LATENCY multistream_target_latency: CM_MLPERF_LOADGEN_MULTISTREAM_TARGET_LATENCY - performance_sample_count: CM_MLPERF_PERFORMANCE_SAMPLE_COUNT + performance_sample_count: CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT # Env keys which are exposed to higher level scripts new_env_keys: diff --git a/cmx4mlops/cmx4mlops/repo/script/generate-mlperf-inference-user-conf/customize.py b/cmx4mlops/cmx4mlops/repo/script/generate-mlperf-inference-user-conf/customize.py index 6fc9a3bb4..fc31f0c1d 100644 --- a/cmx4mlops/cmx4mlops/repo/script/generate-mlperf-inference-user-conf/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/generate-mlperf-inference-user-conf/customize.py @@ -228,8 +228,8 @@ def preprocess(i): user_conf += ml_model_name + "." + scenario + \ "." + metric + " = " + str(metric_value) + "\n" - if env.get('CM_MLPERF_PERFORMANCE_SAMPLE_COUNT', '') != '': - performance_sample_count = env['CM_MLPERF_PERFORMANCE_SAMPLE_COUNT'] + if env.get('CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT', '') != '': + performance_sample_count = env['CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT'] user_conf += ml_model_name + ".*.performance_sample_count_override = " + \ performance_sample_count + "\n" diff --git a/cmx4mlops/cmx4mlops/repo/script/get-cudnn/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-cudnn/_cm.yaml index b01506f6d..fa5ccd2c7 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-cudnn/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-cudnn/_cm.yaml @@ -19,6 +19,7 @@ default_env: deps: - tags: detect,os +- tags: detect,sudo - names: - cuda skip_if_env: diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-coco2014/customize.py b/cmx4mlops/cmx4mlops/repo/script/get-dataset-coco2014/customize.py index 0349003fd..43511c042 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-dataset-coco2014/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-coco2014/customize.py @@ -35,17 +35,17 @@ def postprocess(i): env = i['env'] if env.get('CM_GENERATE_SAMPLE_ID', '') == "yes": env['CM_COCO2014_SAMPLE_ID_PATH'] = os.path.join( - os.getcwd(), 'install', 'sample_ids.txt') + os.getcwd(), 'sample_ids.txt') print(env['CM_COCO2014_SAMPLE_ID_PATH']) if env.get('CM_DATASET_CALIBRATION', '') == "no": - env['CM_DATASET_PATH_ROOT'] = os.path.join(os.getcwd(), 'install') + env['CM_DATASET_PATH_ROOT'] = os.getcwd() # env['CM_DATASET_PATH'] = os.path.join(os.getcwd(), 'install', 'validation', 'data') env['CM_DATASET_CAPTIONS_DIR_PATH'] = os.path.join( - os.getcwd(), 'install', 'captions') + os.getcwd(), 'captions') env['CM_DATASET_LATENTS_DIR_PATH'] = os.path.join( - os.getcwd(), 'install', 'latents') + os.getcwd(), 'latents') else: env['CM_CALIBRATION_DATASET_PATH'] = os.path.join( - os.getcwd(), 'install', 'calibration', 'data') + os.getcwd(), 'calibration', 'data') return {'return': 0} diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-coco2014/run.sh b/cmx4mlops/cmx4mlops/repo/script/get-dataset-coco2014/run.sh index 61b9ffe52..3685b161c 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-dataset-coco2014/run.sh +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-coco2014/run.sh @@ -5,8 +5,7 @@ python3() { export -f python3 CUR=${PWD} -mkdir -p install -INSTALL_DIR=${CUR}/install +INSTALL_DIR=${CUR} cd ${CM_RUN_DIR} diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-cognata-mlcommons/COPYRIGHT.md b/cmx4mlops/cmx4mlops/repo/script/get-dataset-cognata-mlcommons/COPYRIGHT.md new file mode 100644 index 000000000..a059b0c49 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-cognata-mlcommons/COPYRIGHT.md @@ -0,0 +1,9 @@ +# Copyright Notice + +© 2024-2025 MLCommons. All Rights Reserved. + +This file is licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. A copy of the License can be obtained at: + +[Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) + +Unless required by applicable law or agreed to in writing, software distributed under the License is provided on an "AS IS" basis, without warranties or conditions of any kind, either express or implied. Please refer to the License for the specific language governing permissions and limitations under the License. diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-cognata-mlcommons/README-extra.md b/cmx4mlops/cmx4mlops/repo/script/get-dataset-cognata-mlcommons/README-extra.md new file mode 100644 index 000000000..0bb16ad46 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-cognata-mlcommons/README-extra.md @@ -0,0 +1,62 @@ +Examples: + +### Check flags + +```bash +cm run script --tags=get,raw,dataset,mlcommons-cognata --help +``` + +### Import already downloaded dataset + +Note that this automation will attempt to install aria2 tool via sudo apt on Ubuntu. + +```bash +cm run script --tags=get,raw,dataset,mlcommons-cognata --import=${HOME}/datasets/cognata -j +cm run script --tags=get,raw,dataset,mlcommons-cognata --import=${HOME}/datasets/cognata -j --private_url="{ADD PRIVATE URL FOR COGNATA} FOR FULL AUTOMATION" +cm run script --tags=get,raw,dataset,mlcommons-cognata --import=%userprofile%\datasets\cognata -j +cm run script --tags=get,raw,dataset,mlcommons-cognata --import=D:\Work2\cognata -j +``` + +### Download dataset to CM cache + +```bash +cm run script --tags=get,raw,dataset,mlcommons-cognata +``` + +### Find dataset in CM cache + +```bash +cm show cache --tags=dataset,mlcommons-cognata + +cm rm cache --tags=dataset,mlcommons-cognata +``` + +### Download dataset to some local directory + +```bash +cm run script --tags=get,raw,dataset,mlcommons-cognata --path=${HOME}/datasets/cognata -j +cm run script --tags=get,raw,dataset,mlcommons-cognata --path=%userprofile%\datasets\cognata -j +cm run script --tags=get,raw,dataset,mlcommons-cognata --path=D:\Work2\cognata-downloaded -j + +``` + +### Download subsets of this dataset + +```bash +cm run script --tags=get,raw,dataset,mlcommons-cognata --serial_numbers=10002_Urban_Clear_Morning +cm run script --tags=get,raw,dataset,mlcommons-cognata --serial_numbers=10002_Urban_Clear_Morning --group_names=Cognata_Camera_01_8M +cm run script --tags=get,raw,dataset,mlcommons-cognata --serial_numbers=10002_Urban_Clear_Morning --group_names=Cognata_Camera_01_8M --file_names=Cognata_Camera_01_8M_ann.zip;Cognata_Camera_01_8M_ann_laneline.zip;Cognata_Camera_01_8M.zip +cm run script --tags=get,raw,dataset,mlcommons-cognata --serial_numbers=10002_Urban_Clear_Morning --group_names=Cognata_Camera_01_8M --file_names=Cognata_Camera_01_8M_ann.zip;Cognata_Camera_01_8M_ann_laneline.zip;Cognata_Camera_01_8M.zip +``` + +Compact way to download the ABTF demo data set to the CM cache: + +```bash +cm run script --tags=get,raw,dataset,mlcommons-cognata,_abtf-demo +``` + +or to specific path +```bash +cm run script --tags=get,raw,dataset,mlcommons-cognata _abtf-demo" --path=./cognata +cm run script --tags=get,raw,dataset,mlcommons-cognata _abtf-demo" --path=.\cognata +``` diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-cognata-mlcommons/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-dataset-cognata-mlcommons/_cm.yaml new file mode 100644 index 000000000..1b8155d7b --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-cognata-mlcommons/_cm.yaml @@ -0,0 +1,161 @@ +# Written by Grigori Fursin + +alias: get-dataset-cognata-mlcommons +uid: 464ce21f2dce464e + +automation_alias: script +automation_uid: 5b4e0237da074764 + +developers: "Grigori Fursin, Radoyeh Shojaei" + +tags: +- get +- raw +- dataset +- cognata +- mlcommons-cognata +- ml-task--object-detection +- ml-task--image-segmentation + +min_cm_version: '2.2.0' + +private: true + +cache: false + +category: AI/ML datasets +category_sort: 8500 + + +input_mapping: + update: CM_DATASET_MLCOMMONS_COGNATA_UPDATE + import: CM_DATASET_MLCOMMONS_COGNATA_IMPORT_PATH + private_url: CM_DATASET_MLCOMMONS_COGNATA_PRIVATE_URL + serial_numbers: CM_DATASET_MLCOMMONS_COGNATA_SERIAL_NUMBERS + group_names: CM_DATASET_MLCOMMONS_COGNATA_GROUP_NAMES + file_names: CM_DATASET_MLCOMMONS_COGNATA_FILE_NAMES + +env: + CM_DATASET: MLCOMMONS_COGNATA + CM_DATASET_MLCOMMONS_COGNATA_KEY1: "Dataset 1.0" + + + + +deps: +# Prepare dummy CM cache entry to manage dataset +- names: + - custom-cache-entry-mlcommons-cognata-dataset + tags: create,custom,cache,entry + extra_cache_tags: dataset,cognata,mlcommons-cognata + skip_if_env: + CM_DATASET_MLCOMMONS_COGNATA_FILE_NAMES: + - 'off' + env_key: DATASET_MLCOMMONS_COGNATA + # this script will prepare env CM_CUSTOM_CACHE_ENTRY_{env_key}_PATH + + +prehook_deps: +- names: + - gdrive-downloader-cognata + skip_if_env: + CM_DATASET_MLCOMMONS_COGNATA_FILE_NAMES: + - 'on' + enable_if_env: + CM_DATASET_MLCOMMONS_COGNATA_IMPORTED: + - 'no' + CM_DATASET_MLCOMMONS_COGNATA_DOWNLOAD_TOOL: + - gdrive + tags: download,file,_gdown,_url.https://drive.google.com/drive/folders/1FS-qLbzB5htgMnfry6z4gx8J_ZH_7MsJ?usp=drive_link + env: + CM_DOWNLOAD_EXTRA_OPTIONS: " --folder" + CM_DOWNLOAD_FILENAME: 10002_Urban_Clear_Morning + CM_DOWNLOAD_FINAL_ENV_NAME: CM_CUSTOM_CACHE_ENTRY_DATASET_MLCOMMONS_COGNATA_PATH + force_cache: true + extra_cache_tags: abtf,cognata,poc,dataset + +- names: + - rclone-downloader-cognata + skip_if_env: + CM_DATASET_MLCOMMONS_COGNATA_FILE_NAMES: + - 'on' + enable_if_env: + CM_DATASET_MLCOMMONS_COGNATA_IMPORTED: + - 'no' + CM_DATASET_MLCOMMONS_COGNATA_DOWNLOAD_TOOL: + - rclone + tags: download-and-extract,file,_extract,_rclone,_url.https://automotive.mlcommons-storage.org/Cognata_Dataset_PoC_Demo%2F10002_Urban_Clear_Morning.zip + env: + CM_RCLONE_COPY_USING: copyurl + CM_RCLONE_CONFIG_CMD: '' + CM_DOWNLOAD_CHECKSUM: '76389b05b0ee1e08d354d3c1b696b8c0' + CM_EXTRACT_EXTRACTED_CHECKSUM_FILE: "<<>>" + CM_DOWNLOAD_PATH: <<>> + CM_EXTRACT_PATH: <<>> + CM_EXTRACT_EXTRACTED_FILENAME: 10002_Urban_Clear_Morning + CM_DAE_FINAL_ENV_NAME: CM_CUSTOM_CACHE_ENTRY_DATASET_MLCOMMONS_COGNATA_PATH + force_cache: true + extra_cache_tags: abtf,cognata,poc,dataset + +- names: + - python + - python3 + tags: get,python3 + skip_if_env: + CM_DATASET_MLCOMMONS_COGNATA_IMPORTED: + - 'yes' + enable_if_env: + CM_DATASET_MLCOMMONS_COGNATA_FILE_NAMES: + - 'on' + +# Python package to read/write Excel files +- tags: get,generic-python-lib,_package.openpyxl + skip_if_env: + CM_DATASET_MLCOMMONS_COGNATA_IMPORTED: + - 'yes' + enable_if_env: + CM_DATASET_MLCOMMONS_COGNATA_FILE_NAMES: + - 'on' + +# Tool to download large files +- tags: get,aria2 + skip_if_env: + CM_DATASET_MLCOMMONS_COGNATA_IMPORTED: + - 'yes' + enable_if_env: + CM_DATASET_MLCOMMONS_COGNATA_FILE_NAMES: + - 'on' + + +variations: + abtf-demo: + group: dataset-type + env: + CM_DATASET_MLCOMMONS_COGNATA_SERIAL_NUMBERS: "10002_Urban_Clear_Morning" + CM_DATASET_MLCOMMONS_COGNATA_GROUP_NAMES: "Cognata_Camera_01_8M" + CM_DATASET_MLCOMMONS_COGNATA_FILE_NAMES: "Cognata_Camera_01_8M_ann.zip;Cognata_Camera_01_8M_ann_laneline.zip;Cognata_Camera_01_8M.zip" + + abtf-poc: + group: dataset-type + default: true + env: + CM_DATASET_MLCOMMONS_COGNATA_SERIAL_NUMBERS: "10002_Urban_Clear_Morning" + CM_DATASET_MLCOMMONS_COGNATA_GROUP_NAMES: "Cognata_Camera_01_8M" + CM_DATASET_MLCOMMONS_COGNATA_FILE_NAMES: "" + + rclone: + group: download-tool + default: true + env: + CM_DATASET_MLCOMMONS_COGNATA_DOWNLOAD_TOOL: rclone + + gdrive: + group: download-tool + env: + CM_DATASET_MLCOMMONS_COGNATA_DOWNLOAD_TOOL: gdrive + +new_env_keys: +- CM_DATASET_MLCOMMONS_COGNATA* + +print_env_at_the_end: + CM_DATASET_MLCOMMONS_COGNATA_PATH: Path to Cognata dataset diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-cognata-mlcommons/checksums/cognata_poc.txt b/cmx4mlops/cmx4mlops/repo/script/get-dataset-cognata-mlcommons/checksums/cognata_poc.txt new file mode 100644 index 000000000..b119faf77 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-cognata-mlcommons/checksums/cognata_poc.txt @@ -0,0 +1,41 @@ +9791a229f8fcd33de5db5ad5a4d8ca93 ./Cognata_Camera_01_8M_ann/0000001666.csv +eb03eead214fad6e9a8964b9b0fdb2a6 ./Cognata_Camera_01_8M_ann/0000003333.csv +72fa733da768e725cd562fd4472ada05 ./Cognata_Camera_01_8M_ann/0000005000.csv +5b30693ca5a339d68244ab5eaf1bb13a ./Cognata_Camera_01_8M_ann/0000006666.csv +644060ed86d5ac1ae58e25dc2762f294 ./Cognata_Camera_01_8M_ann/0000008333.csv +20cf4818e7b68f50101bf19614c36fee ./Cognata_Camera_01_8M_ann/0000010000.csv +056637bd394a898899445bd9d9d638c4 ./Cognata_Camera_01_8M_ann/0000011666.csv +5c092bdb26838c22e6c970b85838a8c6 ./Cognata_Camera_01_8M_ann/0000013333.csv +58c0b653fc17e74d590e6a8448f37f20 ./Cognata_Camera_01_8M_ann/0000015000.csv +866628a72aeda956ee2c994a06efd67e ./Cognata_Camera_01_8M_ann/0000016666.csv +20181f8d8fb36616974355016292807c ./Cognata_Camera_01_8M_ann/0000018333.csv +0433e6949bea924a6329c17ca9125971 ./Cognata_Camera_01_8M_ann/0000020000.csv +eb910004272cab64fc41a0dfdf521ca8 ./Cognata_Camera_01_8M_ann/0000021666.csv +9c26c4a7360f3aad89bfea862d1bac93 ./Cognata_Camera_01_8M_ann/0000023333.csv +2f4a7ea573a26a59cb740a4072a8fe71 ./Cognata_Camera_01_8M_ann/0000025000.csv +398b5768046964d141d555ad313f9f47 ./Cognata_Camera_01_8M_ann/0000026666.csv +8c0c7fcef25efb87c041d785ee0d87a6 ./Cognata_Camera_01_8M_ann/0000028333.csv +43bf64024584aa1ca42738517a347599 ./Cognata_Camera_01_8M_ann/0000030000.csv +545dadaafca21841fcfd78404e7da7ba ./Cognata_Camera_01_8M_ann/0000031666.csv +77357f1b417fcd548be949a8b8d9131a ./Cognata_Camera_01_8M_ann/0000033333.csv +6b7de80e2b6114645c1a039761a0422b ./Cognata_Camera_01_8M_png/0000001666.png +776e04bb64d2d782012b6923bec62ae6 ./Cognata_Camera_01_8M_png/0000003333.png +aad25fa016258b71490299c53d588f32 ./Cognata_Camera_01_8M_png/0000005000.png +b7acf5249e09817f8d82469737c016bd ./Cognata_Camera_01_8M_png/0000006666.png +6bf8031926f4b3d6b82b30e7055855e5 ./Cognata_Camera_01_8M_png/0000008333.png +c3248feec77175811e8a85e6c88d5424 ./Cognata_Camera_01_8M_png/0000010000.png +ae1f50d358940d335ae89193bd78aca8 ./Cognata_Camera_01_8M_png/0000011666.png +1b98c42fd12819d14e980b72518ddb88 ./Cognata_Camera_01_8M_png/0000013333.png +b3d9b5a2fc5abffbebb5b63b2e1cce0a ./Cognata_Camera_01_8M_png/0000015000.png +3af5f660ed930d853a048a10a715104a ./Cognata_Camera_01_8M_png/0000016666.png +a52ae7aba6c56d6ef0d4d29f4a8267cb ./Cognata_Camera_01_8M_png/0000018333.png +9388222ca6e65beae42cf4a2f4b1d020 ./Cognata_Camera_01_8M_png/0000020000.png +a5d4d3312f5592d64b57b69a0a0edcea ./Cognata_Camera_01_8M_png/0000021666.png +35af823a8177abef9b72846a93063695 ./Cognata_Camera_01_8M_png/0000023333.png +75eb93f99c36135a16df23612c5802d2 ./Cognata_Camera_01_8M_png/0000025000.png +d9a86566fbcaead7f4a43659723014e7 ./Cognata_Camera_01_8M_png/0000026666.png +0ae5d8933fdb9a86e01e9192d2210340 ./Cognata_Camera_01_8M_png/0000028333.png +8b3775e07f902bb3644f7b1eb9de5a69 ./Cognata_Camera_01_8M_png/0000030000.png +5d2584af6566683784e78f7c71968fa7 ./Cognata_Camera_01_8M_png/0000031666.png +ed9f9570448a8fd3af8540169f0df6df ./Cognata_Camera_01_8M_png/0000033333.png +d17ff83e2bbbf012a54ecac2491144a7 ./demo_files.txt diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-cognata-mlcommons/customize.py b/cmx4mlops/cmx4mlops/repo/script/get-dataset-cognata-mlcommons/customize.py new file mode 100644 index 000000000..d4791c60a --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-cognata-mlcommons/customize.py @@ -0,0 +1,449 @@ +from cmind import utils +import os +import json + + +def preprocess(i): + + env = i['env'] + + cm_cache_dataset_path = env.get( + 'CM_CUSTOM_CACHE_ENTRY_DATASET_MLCOMMONS_COGNATA_PATH', '').strip() + cfg = utils.safe_load_json(cm_cache_dataset_path, 'cfg.json')['meta'] + if cfg.get('imported', False): + env['CM_DATASET_MLCOMMONS_COGNATA_IMPORTED'] = 'yes' + + if env.get('CM_ABTF_SCRATCH_PATH_DATASETS', '') != '': + env['CM_ABTF_SCRATCH_PATH_DATASET_COGNATA'] = os.path.join( + env['CM_ABTF_SCRATCH_PATH_DATASETS'], "cognata") + env['CM_ABTF_SCRATCH_PATH_DATASET_COGNATA_TMP'] = os.path.join( + env['CM_ABTF_SCRATCH_PATH_DATASETS'], "cognata_tmp") + + env['CM_DATASET_COGNATA_POC_TEXT_MD5_FILE_PATH'] = os.path.join( + i['run_script_input']['path'], 'checksums', 'cognata_poc.txt') + + # Check if user requests path not in CM cache + # + # --path (env CM_TMP_PATH) shows where to store Cognata data set instead of CM cahe + # --import tells CM to import existing Cognata from a given path and skip further download/processing + # + import_path = env.get( + 'CM_DATASET_MLCOMMONS_COGNATA_IMPORT_PATH', + '').strip() + if import_path != '': + if not os.path.isdir(import_path): + return {'return': 1, 'error': 'directory to import this dataset doesn\'t exist: {}'.format( + import_path)} + + env['CM_DATASET_MLCOMMONS_COGNATA_IMPORTED'] = 'yes' + env['CM_DATASET_MLCOMMONS_COGNATA_PATH'] = import_path + + else: + path = env.get('CM_TMP_PATH', '') + if path != '': + env['CM_DATASET_MLCOMMONS_COGNATA_IMPORTED'] = 'no' + + if not os.path.isdir(path): + os.makedirs(path) + + env['CM_DATASET_MLCOMMONS_COGNATA_PATH'] = path + + return {'return': 0} + + +def postprocess(i): + + env = i['env'] + + automation = i['automation'] + cm = automation.cmind + + cur_dir = os.getcwd() + + quiet = (env.get('CM_QUIET', False) == 'yes') + + cm_cache_dataset_path = env.get( + 'CM_CUSTOM_CACHE_ENTRY_DATASET_MLCOMMONS_COGNATA_PATH', '').strip() + + if not os.path.isdir(cm_cache_dataset_path): + return { + 'return': 1, 'error': 'Dataset corrupted - CM cache path not found: {}'.format(cm_cache_dataset_path)} + + if env.get('CM_DATASET_MLCOMMONS_COGNATA_FILE_NAMES', '') == '': + env['CM_DATASET_MLCOMMONS_COGNATA_PATH'] = os.path.dirname( + env['CM_CUSTOM_CACHE_ENTRY_DATASET_MLCOMMONS_COGNATA_PATH']) + env['CM_GET_DEPENDENT_CACHED_PATH'] = env['CM_DATASET_MLCOMMONS_COGNATA_PATH'] + return {'return': 0} + + cm_cache_dataset_cfg_file = os.path.join(cm_cache_dataset_path, 'cfg.json') + env['CM_DATASET_MLCOMMONS_COGNATA_CFG_FILE'] = cm_cache_dataset_cfg_file + + cfg = utils.safe_load_json('', cm_cache_dataset_cfg_file)['meta'] + + dataset_path = cfg.get('real_path', '') + dataset_path_requested = env.get('CM_DATASET_MLCOMMONS_COGNATA_PATH', '') + if dataset_path == '': + if dataset_path_requested != '': + dataset_path = dataset_path_requested + else: + dataset_path = os.path.join(cm_cache_dataset_path, 'cognata') + else: + if dataset_path_requested != '': + dataset_path = dataset_path_requested + + cfg['real_path'] = dataset_path + + print('') + print('Used dataset path: {}'.format(dataset_path)) + + env['CM_DATASET_MLCOMMONS_COGNATA_PATH'] = dataset_path + + # If imported, don't process further + if env.get('CM_DATASET_MLCOMMONS_COGNATA_IMPORTED', '') == 'yes': + cfg['imported'] = True + else: + cfg['imported'] = False + + utils.save_json(cm_cache_dataset_cfg_file, cfg) + + if cfg.get('imported', False): + return {'return': 0} + + # If processed once, don't process unless forced + if cfg.get('processed', False): + if not utils.check_if_true_yes_on( + env, 'CM_DATASET_MLCOMMONS_COGNATA_UPDATE'): + print('') + print('Already processed: use --update to update this dataset') + + return {'return': 0} + + # First level dir + dataset_path1 = dataset_path + + if not os.path.isdir(dataset_path1): + os.makedirs(dataset_path1) + + # Check if has license and download URL + dataset_path_secret = os.path.join(dataset_path1, 'secret.json') + + first_url = '' + dataset_meta = {} + + if os.path.isfile(dataset_path_secret): + r = utils.load_json(dataset_path_secret) + if r['return'] > 0: + return r + + dataset_meta = r['meta'] + + first_url = dataset_meta.get('first_url', '').strip() + + if first_url == '': + x = env.get('CM_DATASET_MLCOMMONS_COGNATA_PRIVATE_URL', '').strip() + if x != '': + first_url = x + else: + print('') + first_url = input( + 'Please register at https://mlcommons.org/datasets/cognata and enter private URL: ') + + first_url = first_url.strip() + + if first_url == '': + return {'return': 1, + 'error': 'Private MLCommons Cognata URL was not provided'} + + dataset_meta['first_url'] = first_url + + with open(dataset_path_secret, 'w') as f: + f.write(json.dumps(dataset_meta, indent=2) + '\n') + + ########################################################################## + # Check if first.xlsx exists + file_first_xlsx = 'first.xlsx' + first_xlsx = os.path.join(dataset_path1, file_first_xlsx) + + if not os.path.isfile(first_xlsx): + # Attempting to download file + first_url_export, dummy = google_url_for_export(first_url) + + if first_url_export == '': + return { + 'return': 1, 'error': 'can\'t parse URL for export: {}'.format(first_url)} + + r = cm.access({'action': 'run', + 'automation': 'script', + 'tags': 'download,file,_wget', + 'verify': 'no', + 'url': first_url_export, + 'output_file': file_first_xlsx, + 'store': dataset_path1}) + if r['return'] > 0: + return r + + if not os.path.isfile(first_xlsx): + return {'return': 1, + 'error': 'File {} was not downloaded'.format(first_xlsx)} + + ########################################################################## + # Parse XLSX and check serial number + serial_numbers = [] + for s in env.get( + 'CM_DATASET_MLCOMMONS_COGNATA_SERIAL_NUMBERS', '').strip().split(','): + s = s.strip() + if s != '' and s not in serial_numbers: + serial_numbers.append(s) + + dataset_key = env['CM_DATASET_MLCOMMONS_COGNATA_KEY1'] + url_key = 'Link to Excel File (Download Links)' + serial_key = 'Serial Number' + + r = process_xlsx( + first_xlsx, + dataset_key, + url_key, + serial_key, + serial_numbers) + if r['return'] > 0: + return r + + headers = r['headers'] + data = r['data'] + all_data = r['all_data'] + + if len(all_data) != 0: + file_first_json = 'first.json' + first_json = os.path.join(dataset_path1, file_first_json) + + if not os.path.isfile(first_json): + with open(first_json, 'w') as f: + f.write(json.dumps(all_data, indent=2) + '\n') + + if len(data) == 0: + return {'return': 0, 'error': 'no sets found'} + + ########################################################################## + print('') + print('Available or selected serial numbers (use --serial_numbers=a,b,c to download specific subsets):') + print('') + for d in data: + s = d[serial_key] + print(s) + + for d in data: + url = d[url_key] + url_export, dummy = google_url_for_export(url) + + serial_file = d[serial_key] + '.xlsx' + + dataset_path2 = os.path.join(dataset_path1, serial_file) + dataset_path3 = os.path.join(dataset_path1, d[serial_key]) + + if not os.path.isdir(dataset_path3): + os.makedirs(dataset_path3) + + if not os.path.isfile(dataset_path2): + + print('') + print('Downloading {} ...'.format(url_export)) + + r = cm.access({'action': 'run', + 'automation': 'script', + 'tags': 'download,file,_wget', + 'verify': 'no', + 'url': url_export, + 'output_file': serial_file, + 'store': dataset_path1}) + if r['return'] > 0: + return r + + ########################################################################## + print('') + print('Processing subsets ...') + + group_names = [] + for s in env.get('CM_DATASET_MLCOMMONS_COGNATA_GROUP_NAMES', + '').strip().split(','): + s = s.strip() + if s != '' and s not in group_names: + group_names.append(s) + + # Check if force some filenames + x = env.get('CM_DATASET_MLCOMMONS_COGNATA_FILE_NAMES', '').strip() + file_names = [] + if x != '': + file_names = x.strip(';') if ';' in x else [x] + + for d in data: + serial_file = d[serial_key] + '.xlsx' + + dataset_path2 = os.path.join(dataset_path1, serial_file) + dataset_path3 = os.path.join(dataset_path1, d[serial_key]) + + print('') + print('Processing {} ...'.format(serial_file)) + + dataset_key = 'File_Data' + url_key = 'File_Link' + serial_key = 'Group_Name' + + r = process_xlsx( + dataset_path2, + dataset_key, + url_key, + serial_key, + group_names) + if r['return'] > 0: + return r + + headers = r['headers'] + data = r['data'] + all_data = r['all_data'] + + if len(all_data) != 0: + file_all_json = 'all.json' + all_json = os.path.join(dataset_path3, file_all_json) + + if not os.path.isfile(all_json): + with open(all_json, 'w') as f: + f.write(json.dumps(all_data, indent=2) + '\n') + + if len(data) == 0: + return {'return': 0, 'error': 'no sub-sets found'} + + for d in data: + file_name = d['File_Name'] + + if len(file_names) > 0 and file_name not in file_names: + continue + + file_name_with_path = os.path.join(dataset_path3, file_name) + file_name_with_path_done = os.path.join( + dataset_path3, file_name) + '.done' + + url = d[url_key] + + print('') + print('Downloading {} ...'.format(file_name)) + + if os.path.isfile(file_name_with_path_done): + print('') + print(' Already processed - skipping ...') + continue + + if os.name == 'nt': + aria2_tool = env['CM_ARIA2_BIN_WITH_PATH'] + else: + aria2_tool = 'aria2c' + + cmd = aria2_tool + \ + ' --async-dns=false -x15 -s15 "{}" --dir "{}" -o "{}"'.format( + url, dataset_path3, file_name) + + print('') + print(cmd) + print('') + + os.system(cmd) + + # Unarchive + print('') + print('Extracting file {} ...'.format(file_name_with_path)) + print('') + + if file_name.endswith('.zip'): + + import zipfile + extractor = zipfile.ZipFile(file_name_with_path, "r") + + elif file_name.endswith('.tar'): + + import tarfile + extractor = tarfile.ZipFile(file_name_with_path, "r") + + else: + extractor = None + + if extractor is not None: + + try: + extractor.extractall(dataset_path3) + extractor.close() + + except Exception as e: + return {'return': 1, + 'error': 'extracting failed: {}'.format(e)} + + # Mark as downloaded + with open(file_name_with_path_done, 'w') as f: + f.write('DONE\n') + + # Remove file + os.remove(file_name_with_path) + + print('') + + # Mark that processed this dataset once correctly + cfg['processed'] = True + utils.save_json(cm_cache_dataset_cfg_file, cfg) + + env['CM_GET_DEPENDENT_CACHED_PATH'] = env['CM_DATASET_MLCOMMONS_COGNATA_PATH'] + + return {'return': 0} + + +# Prepare Google URL for export +def google_url_for_export(url): + url2 = '' + + j = url.rfind('/') + + if j > 0: + url = url[:j + 1] + url2 = url + 'export' + + return (url2, url) + +# Download Cognata XLSX + + +def process_xlsx(filename, dataset_key, url_key, serial_key, serial_numbers): + import openpyxl + + ex = openpyxl.load_workbook(filename) + + sets = ex[dataset_key] + + headers = {} + + data = [] + all_data = [] + + for row in sets.iter_rows(values_only=True): + lrow = list(row) + + if len(headers) == 0: + for j in range(0, len(lrow)): + headers[j] = str(lrow[j]).strip() + else: + xrow = {} + + for j in range(0, len(lrow)): + xrow[headers[j]] = lrow[j] + + url = str(xrow.get(url_key, '')) + if 'https' in url: + all_data.append(xrow) + + if len(serial_numbers) > 0: + serial_number = xrow.get(serial_key, '') + + if serial_number not in serial_numbers: + continue + + if url != '': + data.append(xrow) + + return {'return': 0, 'headers': headers, + 'data': data, 'all_data': all_data} diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-igbh/COPYRIGHT.md b/cmx4mlops/cmx4mlops/repo/script/get-dataset-igbh/COPYRIGHT.md new file mode 100644 index 000000000..a059b0c49 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-igbh/COPYRIGHT.md @@ -0,0 +1,9 @@ +# Copyright Notice + +© 2024-2025 MLCommons. All Rights Reserved. + +This file is licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. A copy of the License can be obtained at: + +[Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) + +Unless required by applicable law or agreed to in writing, software distributed under the License is provided on an "AS IS" basis, without warranties or conditions of any kind, either express or implied. Please refer to the License for the specific language governing permissions and limitations under the License. diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-igbh/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-dataset-igbh/_cm.yaml new file mode 100644 index 000000000..8e5c7b4cd --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-igbh/_cm.yaml @@ -0,0 +1,448 @@ +alias: get-dataset-igbh +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: true +tags: +- get +- dataset +- mlperf +- rgat +- igbh +- inference +uid: 824e61316c074253 +new_env_keys: + - CM_DATASET_IGBH_PATH + - CM_DATASET_IGBH_SIZE +input_mapping: + out_path: CM_DATASET_IGBH_OUT_PATH +env: + SKIP_USER_PROMPT: yes +deps: + - tags: mlperf,inference,source + names: + - inference-src + - tags: get,python + names: + - get-python + - tags: get,generic-python-lib,_package.igb,_url.git+https://github.com/gateoverflow/IGB-Datasets.git + - tags: get,generic-python-lib,_package.colorama + - tags: get,generic-python-lib,_package.tqdm + + +prehook_deps: + #paper + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper/node_feat.npy + CM_DOWNLOAD_CHECKSUM: 71058b9ac8011bafa1c5467504452d13 + CM_DOWNLOAD_FILENAME: node_feat.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ + extra_cache_tags: dataset,igbh,paper,node_feat + force_env_keys: + - CM_OUTDIRNAME + force_cache: true + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - paper-node-feat + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper/node_label_19.npy + CM_DOWNLOAD_CHECKSUM: be6fda45566e679bdb05ebea98ad16d4 + CM_DOWNLOAD_FILENAME: node_label_19.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ + extra_cache_tags: dataset,igbh,paper,node_label_19 + force_env_keys: + - CM_OUTDIRNAME + force_cache: true + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - paper-node-label19 + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper/node_label_2K.npy + CM_DOWNLOAD_CHECKSUM: 6eccab9a14f92f42be5b367c39002031 + CM_DOWNLOAD_FILENAME: node_label_2K.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ + extra_cache_tags: dataset,igbh,paper,node_label_2K + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - paper-node-label2k + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper/paper_id_index_mapping.npy + CM_DOWNLOAD_CHECKSUM: f70dd642a4f7e41d926c91c8c054fc4c + CM_DOWNLOAD_FILENAME: paper_id_index_mapping.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ + extra_cache_tags: dataset,igbh,paper,paper_id_index_mapping + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - paper-id-index-mapping + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + #paper_cites_paper + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper__cites__paper/edge_index.npy + CM_DOWNLOAD_CHECKSUM: f4897f53636c04a9c66f6063ec635c16 + CM_DOWNLOAD_FILENAME: edge_index.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/paper__cites__paper/ + extra_cache_tags: dataset,igbh,paper_cites_paper,edge_index + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - edge-index + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + # author + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/author/author_id_index_mapping.npy + CM_DOWNLOAD_CHECKSUM: 58c15aab7dae03bbd57e6a4ac5e61bd9 + CM_DOWNLOAD_FILENAME: author_id_index_mapping.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/author/ + extra_cache_tags: dataset,igbh,author,author_id_index_mapping + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - author-id-index-mapping + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/author/node_feat.npy + CM_DOWNLOAD_CHECKSUM: 2ec2512b554088381c04ec013e893c8d + CM_DOWNLOAD_FILENAME: node_feat.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/author/ + extra_cache_tags: dataset,igbh,author,node_feat + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - author-node-feat + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + # conference + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/conference/conference_id_index_mapping.npy + CM_DOWNLOAD_CHECKSUM: 0bf7c555d8c697b31b6af6c4cb6b6612 + CM_DOWNLOAD_FILENAME: conference_id_index_mapping.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/conference/ + extra_cache_tags: dataset,igbh,conference,conference_id_index_mapping + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - conference-id-index-mapping + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/conference/node_feat.npy + CM_DOWNLOAD_CHECKSUM: 898ff529b8cf972261fedd50df6377f8 + CM_DOWNLOAD_FILENAME: node_feat.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/conference/ + extra_cache_tags: dataset,igbh,conference,node_feat + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - conference-node-feat + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + # institute + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/institute/institute_id_index_mapping.npy + CM_DOWNLOAD_CHECKSUM: 03fb45eafb7bd35875ef4c7cd2a299a9 + CM_DOWNLOAD_FILENAME: institute_id_index_mapping.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/institute/ + extra_cache_tags: dataset,igbh,institute,institute_id_index_mapping + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - institute-id-index-mapping + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/institute/node_feat.npy + CM_DOWNLOAD_CHECKSUM: 12eaeced22d17b4e97d4b4742331c819 + CM_DOWNLOAD_FILENAME: node_feat.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/institute/ + extra_cache_tags: dataset,igbh,institute,node_feat + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - institute-node-feat + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + # journal + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/journal/journal_id_index_mapping.npy + CM_DOWNLOAD_CHECKSUM: b630c20852b76d17a5c9c37b39176f69 + CM_DOWNLOAD_FILENAME: journal_id_index_mapping.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/journal/ + extra_cache_tags: dataset,igbh,journal,journal_id_index_mapping + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - journal-id-index-mapping + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/journal/node_feat.npy + CM_DOWNLOAD_CHECKSUM: 49d51b554b3004f10bee19d1c7f9b416 + CM_DOWNLOAD_FILENAME: node_feat.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/journal/ + extra_cache_tags: dataset,igbh,journal,node_feat + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - journal-node-feat + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + # fos + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/fos/fos_id_index_mapping.npy + CM_DOWNLOAD_CHECKSUM: 0f0cfde619361cde35d3be9f201d081a + CM_DOWNLOAD_FILENAME: fos_id_index_mapping.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/fos/ + extra_cache_tags: dataset,igbh,fos,fos_id_index_mapping + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - fos-id-index-mapping + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/fos/node_feat.npy + CM_DOWNLOAD_CHECKSUM: 3ef3df19e2475c387fec10bac82773df + CM_DOWNLOAD_FILENAME: node_feat.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/fos/ + extra_cache_tags: dataset,igbh,fos,node_feat + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - fos-node-feat + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + # author__affiliated_to__institute + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/author__affiliated_to__institute/edge_index.npy + CM_DOWNLOAD_CHECKSUM: e35dba208f81e0987207f78787c75711 + CM_DOWNLOAD_FILENAME: edge_index.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/author__affiliated_to__institute/ + extra_cache_tags: dataset,igbh,author_affiliated_to_institute,edge_index + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - author-to-institute-edge-index + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + # paper__published__journal + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper__published__journal/edge_index.npy + CM_DOWNLOAD_CHECKSUM: 38505e83bde8e5cf94ae0a85afa60e13 + CM_DOWNLOAD_FILENAME: edge_index.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/paper__published__journal/ + extra_cache_tags: dataset,igbh,paper_published_journal,edge_index + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - paper-published-journal-edge-index + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + # paper__topic__fos + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper__topic__fos/edge_index.npy + CM_DOWNLOAD_CHECKSUM: 427fb350a248ee6eaa8c21cde942fda4 + CM_DOWNLOAD_FILENAME: edge_index.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/paper__topic__fos/ + extra_cache_tags: dataset,igbh,paper_topic_fos,edge_index + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - paper-topic-fos-edge-index + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + # paper__venue__conference + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper__venue__conference/edge_index.npy + CM_DOWNLOAD_CHECKSUM: 541b8d43cd93579305cfb71961e10a7d + CM_DOWNLOAD_FILENAME: edge_index.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/paper__venue__conference/ + extra_cache_tags: dataset,igbh,paper_venue_conference,edge_index + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - paper-venue-conference-edge-index + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + # paper__written_by__author + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper__written_by__author/edge_index.npy + CM_DOWNLOAD_CHECKSUM: df39fe44bbcec93a640400e6d81ffcb5 + CM_DOWNLOAD_FILENAME: edge_index.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/paper__written_by__author/ + extra_cache_tags: dataset,igbh,paper_written_by_author,edge_index + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - paper-written-by-author-edge-index + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL +variations: + debug: + default: true + group: dataset-type + env: + CM_DATASET_IGBH_TYPE: debug + CM_DATASET_IGBH_SIZE: tiny + full: + group: dataset-type + env: + CM_DATASET_IGBH_TYPE: full + CM_DATASET_IGBH_SIZE: full + glt: + env: + CM_IGBH_GRAPH_COMPRESS: yes + csc: + group: compressed-layout + default: true + env: + CM_IGBH_GRAPH_COMPRESS_LAYOUT: csc + csr: + group: compressed-layout + env: + CM_IGBH_GRAPH_COMPRESS_LAYOUT: csr diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-igbh/customize.py b/cmx4mlops/cmx4mlops/repo/script/get-dataset-igbh/customize.py new file mode 100644 index 000000000..de85bd900 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-igbh/customize.py @@ -0,0 +1,69 @@ +from cmind import utils +import os + + +def preprocess(i): + + os_info = i['os_info'] + + env = i['env'] + + if os_info['platform'] == "windows": + return {'return': 1, 'error': 'Script not supported in windows yet!'} + + print("Using MLCommons Inference source from '" + + env['CM_MLPERF_INFERENCE_SOURCE'] + "'") + + # run cmd + run_cmd = "" + graph_folder = os.path.join( + env['CM_MLPERF_INFERENCE_SOURCE'], 'graph', 'R-GAT') + + if env.get('CM_DATASET_IGBH_PATH', + '') != '': # skip download, just register in cache + env['CM_DATASET_IGBH_OUT_PATH'] = env['CM_DATASET_IGBH_PATH'] + return {'return': 0} + + download_loc = env.get('CM_DATASET_IGBH_OUT_PATH', os.getcwd()) + + env['CM_DATASET_IGBH_DOWNLOAD_LOCATION'] = download_loc + + run_cmd += f"cd {graph_folder} " + x_sep = " && " + + # download the model + if env['CM_DATASET_IGBH_TYPE'] == "debug": + run_cmd += x_sep + env['CM_PYTHON_BIN_WITH_PATH'] + \ + f" tools/download_igbh_test.py --target-path {download_loc} " + + else: + env['CM_DATASET_IGBH_FULL_DOWNLOAD'] = 'yes' + + # split seeds + run_cmd += x_sep + \ + f"""{ + env['CM_PYTHON_BIN_WITH_PATH']} tools/split_seeds.py --path {download_loc} --dataset_size { + env['CM_DATASET_IGBH_SIZE']} """ + + # compress graph(for glt implementation) + if env.get('CM_IGBH_GRAPH_COMPRESS', '') == "yes": + run_cmd += x_sep + \ + f"""{env['CM_PYTHON_BIN_WITH_PATH']} tools/compress_graph.py --path {download_loc} --dataset_size {env['CM_DATASET_IGBH_SIZE']} --layout {env['CM_IGBH_GRAPH_COMPRESS_LAYOUT']} + """ + + env['CM_RUN_CMD'] = run_cmd + + return {'return': 0} + + +def postprocess(i): + + env = i['env'] + + env['CM_DATASET_IGBH_PATH'] = env.get( + 'CM_DATASET_IGBH_OUT_PATH', os.getcwd()) + + print( + f"Path to the IGBH dataset: {os.path.join(env['CM_DATASET_IGBH_PATH'], env['CM_DATASET_IGBH_SIZE'])}") + + return {'return': 0} diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-igbh/run.sh b/cmx4mlops/cmx4mlops/repo/script/get-dataset-igbh/run.sh new file mode 100644 index 000000000..238652160 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-igbh/run.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +#CM Script location: ${CM_TMP_CURRENT_SCRIPT_PATH} + +#To export any variable +#echo "VARIABLE_NAME=VARIABLE_VALUE" >>tmp-run-env.out + +#${CM_PYTHON_BIN_WITH_PATH} contains the path to python binary if "get,python" is added as a dependency + + + +function exit_if_error() { + test $? -eq 0 || exit $? +} + +function run() { + echo "Running: " + echo "$1" + echo "" + eval "$1" + exit_if_error +} + +run "$CM_RUN_CMD" diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-imagenet-aux/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-dataset-imagenet-aux/_cm.yaml index 242b53abc..c5944aedf 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-dataset-imagenet-aux/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-imagenet-aux/_cm.yaml @@ -14,6 +14,8 @@ prehook_deps: extra_cache_tags: imagenet-aux,dataset-aux force_cache: true tags: download-and-extract,_extract,_wget + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-imagenet-calibration/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-dataset-imagenet-calibration/_cm.yaml index 741d7e205..7e499146a 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-dataset-imagenet-calibration/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-imagenet-calibration/_cm.yaml @@ -10,6 +10,8 @@ category: "AI/ML datasets" deps: - tags: download,file + force_env_keys: + - CM_OUTDIRNAME force_cache: true extra_cache_tags: imagenet-calibration,imagenet,calibration names: diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-imagenet-val/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-dataset-imagenet-val/_cm.yaml index 0b9923927..0a23afac1 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-dataset-imagenet-val/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-imagenet-val/_cm.yaml @@ -35,6 +35,8 @@ prehook_deps: env: CM_EXTRACT_TO_FOLDER: imagenet-2012-val tags: download-and-extract,file,_extract + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env: - CM_DAE_EXTRA_TAGS update_tags_from_env_with_prefix: diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-mlperf-inference-llama3/COPYRIGHT.md b/cmx4mlops/cmx4mlops/repo/script/get-dataset-mlperf-inference-llama3/COPYRIGHT.md new file mode 100644 index 000000000..a059b0c49 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-mlperf-inference-llama3/COPYRIGHT.md @@ -0,0 +1,9 @@ +# Copyright Notice + +© 2024-2025 MLCommons. All Rights Reserved. + +This file is licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. A copy of the License can be obtained at: + +[Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) + +Unless required by applicable law or agreed to in writing, software distributed under the License is provided on an "AS IS" basis, without warranties or conditions of any kind, either express or implied. Please refer to the License for the specific language governing permissions and limitations under the License. diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-mlperf-inference-llama3/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-dataset-mlperf-inference-llama3/_cm.yaml new file mode 100644 index 000000000..d8af83b88 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-mlperf-inference-llama3/_cm.yaml @@ -0,0 +1,56 @@ +alias: get-dataset-mlperf-inference-llama3 +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: true +tags: +- get +- dataset +- mlperf +- llama3 +- inference +uid: c3bc69599cbc4db7 +new_env_keys: + - CM_DATASET_LLAMA3_PATH +input_mapping: + outdirname: CM_OUTDIRNAME +prehook_deps: + - env: + CM_DOWNLOAD_FINAL_ENV_NAME: CM_DATASET_LLAMA3_PATH + CM_EXTRACT_TO_FOLDER: llama-3-dataset + extra_cache_tags: dataset,llama3 + force_cache: true + enable_if_env: + CM_TMP_REQUIRE_DOWNLOAD: + - 'yes' + names: + - dae + tags: download-and-extract + update_tags_from_env_with_prefix: + _url.: + - CM_DOWNLOAD_URL +variations: + validation: + default: true + group: dataset-type + env: + CM_RCLONE_URL: mlc-inference:mlcommons-inference-wg-public/llama3_405b/mlperf_llama3.1_405b_dataset_8313_processed_fp16_eval.pkl + CM_DATASET_TYPE: validation + CM_DATASET_FILE_NAME: mlperf_llama3.1_405b_dataset_8313_processed_fp16_eval.pkl + calibration: + group: dataset-type + env: + CM_RCLONE_URL: mlc-inference:mlcommons-inference-wg-public/llama3_405b/mlperf_llama3.1_405b_calibration_dataset_512_processed_fp16_eval.pkl + CM_DATASET_TYPE: calibration + CM_DATASET_FILE_NAME: mlperf_llama3.1_405b_calibration_dataset_512_processed_fp16_eval.pkl + rclone: + add_deps_recursive: + dae: + tags: _rclone + default: true + env: + CM_DOWNLOAD_FILENAME: checkpoint + CM_DOWNLOAD_URL: <<>> + CM_RCLONE_CONFIG_NAME: mlc-inference + group: download-tool +print_env_at_the_end: + CM_DATASET_LLAMA3_PATH: Path to the dataset diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-mlperf-inference-llama3/customize.py b/cmx4mlops/cmx4mlops/repo/script/get-dataset-mlperf-inference-llama3/customize.py new file mode 100644 index 000000000..745dc52fe --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-mlperf-inference-llama3/customize.py @@ -0,0 +1,31 @@ +from cmind import utils +import os + + +def preprocess(i): + + os_info = i['os_info'] + + env = i['env'] + + if os_info['platform'] == "windows": + return {'return': 1, 'error': 'Script not supported in windows yet!'} + + if env.get('CM_DATASET_LLAMA3_PATH', '') == '': + env['CM_TMP_REQUIRE_DOWNLOAD'] = "yes" + + if env.get('CM_OUTDIRNAME', '') != '': + env['CM_DOWNLOAD_PATH'] = env['CM_OUTDIRNAME'] + + return {'return': 0} + + +def postprocess(i): + + env = i['env'] + + if env.get('CM_TMP_REQUIRE_DOWNLOAD', '') == "yes": + env['CM_DATASET_LLAMA3_PATH'] = os.path.join( + env['CM_DATASET_LLAMA3_PATH'], env['CM_DATASET_FILE_NAME']) + + return {'return': 0} diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-mlperf-inference-mixtral/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-dataset-mlperf-inference-mixtral/_cm.yaml index f8684eef5..566f7bb05 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-dataset-mlperf-inference-mixtral/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-mlperf-inference-mixtral/_cm.yaml @@ -8,9 +8,11 @@ new_env_keys: prehook_deps: - env: CM_DOWNLOAD_FINAL_ENV_NAME: CM_DATASET_PREPROCESSED_PATH - extra_cache_tags: mixtral,get-mixtral-dataset + extra_cache_tags: mixtral,get-mixtral-dataset force_cache: true tags: download-and-extract + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-openimages-annotations/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-dataset-openimages-annotations/_cm.yaml index a96e7f58e..16158cef6 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-dataset-openimages-annotations/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-openimages-annotations/_cm.yaml @@ -12,6 +12,8 @@ prehook_deps: extra_cache_tags: retinanet,get,dataset-openimages-annotations force_cache: true tags: download-and-extract,_wget,_extract + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-openimages-calibration/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-dataset-openimages-calibration/_cm.yaml index b8bd73e12..6edd3716c 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-dataset-openimages-calibration/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-openimages-calibration/_cm.yaml @@ -10,6 +10,8 @@ category: "AI/ML datasets" deps: - tags: download,file + force_env_keys: + - CM_OUTDIRNAME force_cache: true extra_cache_tags: openimages-calibration,openimages,calibration names: diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-openorca/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-dataset-openorca/_cm.yaml index 861c49575..c860b0213 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-dataset-openorca/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-openorca/_cm.yaml @@ -15,6 +15,8 @@ deps: names: - openorca-src tags: get,git,repo,_lfs,_repo.https://huggingface.co/datasets/Open-Orca/OpenOrca + force_env_keys: + - CM_OUTDIRNAME env: CM_DATASET: OPENORCA new_env_keys: diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-squad-vocab/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-dataset-squad-vocab/_cm.yaml index a6ec2e902..aa1bad21c 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-dataset-squad-vocab/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-squad-vocab/_cm.yaml @@ -12,6 +12,8 @@ prehook_deps: extra_cache_tags: bert,get,dataset-squad-vocab force_cache: true tags: download-and-extract,_wget + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-squad/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-dataset-squad/_cm.yaml index a9dd6ed94..d47fc9ce3 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-dataset-squad/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-squad/_cm.yaml @@ -16,6 +16,8 @@ prehook_deps: extra_cache_tags: bert,get,dataset-squad force_cache: true tags: download-and-extract,_wget + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL diff --git a/cmx4mlops/cmx4mlops/repo/script/get-docker/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-docker/_cm.yaml index e2f33e875..881039852 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-docker/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-docker/_cm.yaml @@ -9,7 +9,8 @@ docker_input_mapping: {} input_description: {} input_mapping: {} new_env_keys: [ - "CM_DOCKER_VERSION" + "CM_DOCKER_VERSION", + "CM_CONTAINER_TOOL" ] new_state_keys: [] post_deps: [] diff --git a/cmx4mlops/cmx4mlops/repo/script/get-docker/customize.py b/cmx4mlops/cmx4mlops/repo/script/get-docker/customize.py index d84a8eaed..0be862b5c 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-docker/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/get-docker/customize.py @@ -58,8 +58,13 @@ def detect_version(i): version = r['version'] + tool = "docker" + + if "podman" in r['string'].lower(): + tool = "podman" + print(i['recursion_spaces'] + ' Detected version: {}'.format(version)) - return {'return': 0, 'version': version} + return {'return': 0, 'version': version, "tool": tool} def postprocess(i): @@ -71,6 +76,7 @@ def postprocess(i): return r version = r['version'] + tool = r['tool'] found_file_path = env['CM_DOCKER_BIN_WITH_PATH'] found_path = os.path.dirname(found_file_path) @@ -81,4 +87,6 @@ def postprocess(i): env['CM_DOCKER_VERSION'] = version + env['CM_CONTAINER_TOOL'] = tool + return {'return': 0, 'version': version} diff --git a/cmx4mlops/cmx4mlops/repo/script/get-generic-python-lib/customize.py b/cmx4mlops/cmx4mlops/repo/script/get-generic-python-lib/customize.py index edbb2d552..2f61bac02 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-generic-python-lib/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/get-generic-python-lib/customize.py @@ -200,4 +200,8 @@ def postprocess(i): if pip_version and len(pip_version) > 1 and int(pip_version[0]) >= 23: env['CM_PYTHON_PIP_COMMON_EXTRA'] = " --break-system-packages" + if version.count('.') > 1: + env[f"{env_version_key}_MAJOR_MINOR"] = ".".join( + version.split(".")[:2]) + return {'return': 0, 'version': version} diff --git a/cmx4mlops/cmx4mlops/repo/script/get-generic-python-lib/detect-version.py b/cmx4mlops/cmx4mlops/repo/script/get-generic-python-lib/detect-version.py index 001c39b37..fc879f04e 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-generic-python-lib/detect-version.py +++ b/cmx4mlops/cmx4mlops/repo/script/get-generic-python-lib/detect-version.py @@ -2,6 +2,7 @@ import sys package_name = os.environ.get('CM_GENERIC_PYTHON_PACKAGE_NAME', '') +package_name = package_name.split("[")[0] filename = 'tmp-ver.out' diff --git a/cmx4mlops/cmx4mlops/repo/script/get-generic-sys-util/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-generic-sys-util/_cm.yaml index 1d45c2c28..b75e24bbc 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-generic-sys-util/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-generic-sys-util/_cm.yaml @@ -212,21 +212,33 @@ variations: brew: '' dnf: boost-devel yum: boost-devel - libbz2-dev: + bzip2: env: - CM_SYS_UTIL_NAME: libbz2_dev + CM_SYS_UTIL_NAME: bzip2 CM_SYS_UTIL_VERSION_CMD_OVERRIDE: bzcat --version 2>&1 | grep bzip > tmp-ver.out CM_SYS_UTIL_VERSION_RE: ([0-9]+(\.[0-9]+)+) CM_TMP_VERSION_DETECT_GROUP_NUMBER: 1 new_env_keys: + - CM_BZIP2_VERSION + state: + bzip2: + apt: bzip2 + brew: bzip2 + dnf: bzip2 + yum: bzip2 + libbz2-dev: + env: + CM_SYS_UTIL_NAME: libbz2_dev + CM_SYS_UTIL_VERSION_CMD: dpkg -s libbz2-dev | grep 'Version' + CM_SYS_UTIL_VERSION_RE: ([0-9]+(\.[0-9]+)+) + CM_TMP_VERSION_DETECT_GROUP_NUMBER: 0 + new_env_keys: - CM_LIBBZ2_DEV_VERSION state: libbz2_dev: apt: libbz2-dev - brew: bzip2 dnf: libbzip2-devel yum: libbzip2-devel - zlib-devel: libbz2-devel libev-dev: env: CM_SYS_UTIL_NAME: libev_dev diff --git a/cmx4mlops/cmx4mlops/repo/script/get-gh-actions-runner/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-gh-actions-runner/_cm.yaml index 3008f6365..287ee254a 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-gh-actions-runner/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-gh-actions-runner/_cm.yaml @@ -6,6 +6,7 @@ can_force_cache: true tags: - get - gh +- github - actions-runner - runner-code - runner @@ -29,21 +30,27 @@ deps: variations: config: + group: command + default: true env: CM_GH_ACTIONS_RUNNER_COMMAND: config remove: + group: command env: CM_GH_ACTIONS_RUNNER_COMMAND: remove install: + group: command deps: - tags: get,gh,actions-runner,_config force_cache: yes env: CM_GH_ACTIONS_RUNNER_COMMAND: install uninstall: + group: command env: CM_GH_ACTIONS_RUNNER_COMMAND: uninstall start: + group: command deps: - tags: get,gh,actions-runner,_install force_cache: yes diff --git a/cmx4mlops/cmx4mlops/repo/script/get-huggingface-cli/COPYRIGHT.md b/cmx4mlops/cmx4mlops/repo/script/get-huggingface-cli/COPYRIGHT.md new file mode 100644 index 000000000..a059b0c49 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-huggingface-cli/COPYRIGHT.md @@ -0,0 +1,9 @@ +# Copyright Notice + +© 2024-2025 MLCommons. All Rights Reserved. + +This file is licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. A copy of the License can be obtained at: + +[Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) + +Unless required by applicable law or agreed to in writing, software distributed under the License is provided on an "AS IS" basis, without warranties or conditions of any kind, either express or implied. Please refer to the License for the specific language governing permissions and limitations under the License. diff --git a/cmx4mlops/cmx4mlops/repo/script/get-huggingface-cli/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-huggingface-cli/_cm.yaml new file mode 100644 index 000000000..6643eb222 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-huggingface-cli/_cm.yaml @@ -0,0 +1,26 @@ +alias: get-huggingface-cli +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: false +can_force_cache: true +category: DevOps automation +clean_files: [] +deps: +- tags: detect,os +tags: +- get +- huggingface +- hf-cli +- huggingface-cli +- cli +input_mapping: + token: CM_HF_TOKEN +uid: e9488a272f1d4160 +deps: + - tags: get,generic-python-lib,_package.huggingface_hub[cli] +variations: + with-login: + cache: true + force_cache: true + env: + CM_HF_DO_LOGIN: yes diff --git a/cmx4mlops/cmx4mlops/repo/script/get-huggingface-cli/customize.py b/cmx4mlops/cmx4mlops/repo/script/get-huggingface-cli/customize.py new file mode 100644 index 000000000..d9e63e42c --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-huggingface-cli/customize.py @@ -0,0 +1,30 @@ +from cmind import utils +import os + + +def preprocess(i): + env = i['env'] + if env.get('CM_HF_TOKEN', '') != '': + env['CM_HF_LOGIN_CMD'] = f"""git config --global credential.helper store && huggingface-cli login --token {env['CM_HF_TOKEN']} --add-to-git-credential +""" + elif str(env.get('CM_HF_DO_LOGIN')).lower() in ["yes", "1", "true"]: + env['CM_HF_LOGIN_CMD'] = f"""git config --global credential.helper store && huggingface-cli login +""" + return {'return': 0} + + +def postprocess(i): + env = i['env'] + + r = i['automation'].parse_version({'match_text': r'huggingface_hub\s*version:\s*([\d.]+)', + 'group_number': 1, + 'env_key': 'CM_GITHUBCLI_VERSION', + 'which_env': i['env']}) + if r['return'] > 0: + return r + + version = r['version'] + + print(i['recursion_spaces'] + ' Detected version: {}'.format(version)) + + return {'return': 0, 'version': version} diff --git a/cmx4mlops/cmx4mlops/repo/script/get-huggingface-cli/run.bat b/cmx4mlops/cmx4mlops/repo/script/get-huggingface-cli/run.bat new file mode 100644 index 000000000..464afe5c7 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-huggingface-cli/run.bat @@ -0,0 +1,14 @@ +@echo off +REM Check if the environment variable CM_HF_LOGIN_CMD is defined and not empty +IF DEFINED CM_HF_LOGIN_CMD ( + echo %CM_HF_LOGIN_CMD% + call %CM_HF_LOGIN_CMD% + IF ERRORLEVEL 1 ( + echo Command failed with error code %ERRORLEVEL% + exit /b %ERRORLEVEL% + ) +) + +REM Run the Hugging Face CLI version command and save output +huggingface-cli version > tmp-ver.out + diff --git a/cmx4mlops/cmx4mlops/repo/script/get-huggingface-cli/run.sh b/cmx4mlops/cmx4mlops/repo/script/get-huggingface-cli/run.sh new file mode 100644 index 000000000..43d20f367 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-huggingface-cli/run.sh @@ -0,0 +1,7 @@ +#!/bin/bash +if [[ -n ${CM_HF_LOGIN_CMD} ]]; then + echo "${CM_HF_LOGIN_CMD}" + eval ${CM_HF_LOGIN_CMD} + test $? -eq 0 || exit $? +fi +huggingface-cli version > tmp-ver.out diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-3d-unet-kits19/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-3d-unet-kits19/_cm.yaml index 7dc7f5b06..658f306a7 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-3d-unet-kits19/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-3d-unet-kits19/_cm.yaml @@ -43,6 +43,8 @@ variations: extra_cache_tags: 3d-unet,medical-imaging force-cache: true tags: download,file,download-file,_wget + force_env_keys: + - CM_OUTDIRNAME env: CM_ML_MODEL_ACCURACY: '0.86170' CM_ML_MODEL_FILE: 3dunet_kits19_128x128x128_dynbatch.onnx @@ -60,6 +62,8 @@ variations: extra_cache_tags: 3d-unet,medical-imaging force-cache: true tags: download,file,download-file,_wget + force_env_keys: + - CM_OUTDIRNAME env: CM_ML_MODEL_ACCURACY: '0.86170' CM_ML_MODEL_FILE: 3dunet_kits19_pytorch.ptc @@ -73,6 +77,8 @@ variations: extra_cache_tags: 3d-unet,medical-imaging force-cache: true tags: download-and-extract,_wget,_extract + force_env_keys: + - CM_OUTDIRNAME env: CM_ML_MODEL_ACCURACY: '0.86170' CM_ML_MODEL_FILE: 3dunet_kits19_pytorch_checkpoint.pth @@ -92,6 +98,8 @@ variations: extra_cache_tags: 3d-unet,medical-imaging force-cache: true tags: download-and-extract,_wget,_extract + force_env_keys: + - CM_OUTDIRNAME env: CM_ML_MODEL_ACCURACY: '0.86170' CM_ML_MODEL_FILE: 3dunet_kits19_128x128x128.tf diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-abtf-ssd-pytorch/COPYRIGHT.md b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-abtf-ssd-pytorch/COPYRIGHT.md new file mode 100644 index 000000000..a059b0c49 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-abtf-ssd-pytorch/COPYRIGHT.md @@ -0,0 +1,9 @@ +# Copyright Notice + +© 2024-2025 MLCommons. All Rights Reserved. + +This file is licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. A copy of the License can be obtained at: + +[Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) + +Unless required by applicable law or agreed to in writing, software distributed under the License is provided on an "AS IS" basis, without warranties or conditions of any kind, either express or implied. Please refer to the License for the specific language governing permissions and limitations under the License. diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-abtf-ssd-pytorch/README-extra.md b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-abtf-ssd-pytorch/README-extra.md new file mode 100644 index 000000000..e08259617 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-abtf-ssd-pytorch/README-extra.md @@ -0,0 +1,5 @@ +# Example to import local model + +```bash +cm run script --tags=get,ml-model,abtf-ssd-pytorch,_local.test_8mp.pth +``` diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-abtf-ssd-pytorch/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-abtf-ssd-pytorch/_cm.yaml new file mode 100644 index 000000000..b346288d2 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-abtf-ssd-pytorch/_cm.yaml @@ -0,0 +1,174 @@ +# Written by Grigori Fursin + +alias: get-ml-model-abtf-ssd-pytorch +uid: b3750a4b222a485d + +automation_alias: script +automation_uid: 5b4e0237da074764 + +cache: true + +category: AI/ML models + +private: true + + +tags: +- get +- ml-model +- abtf-ssd-pytorch +- cmc + + +input_mapping: + model_code_git_url: CM_ABTF_MODEL_CODE_GIT_URL + model_code_git_branch: CM_ABTF_MODEL_CODE_GIT_BRANCH + + +default_env: + CM_ABTF_MODEL_CODE_GIT_URL: https://github.com/mlcommons/abtf-ssd-pytorch + CM_ABTF_MODEL_CODE_GIT_BRANCH: cognata + + +deps: + +- tags: detect,os + +- tags: get,git,repo + names: + - abtf-ssd-pytorch-git-repo + - abtf-ml-model-code-git-repo + skip_if_env: + CM_SKIP_MODEL_CODE_DOWNLOAD: + - 'yes' + env: + CM_GIT_AUTH: 'yes' + CM_GIT_CHECKOUT_PATH_ENV_NAME: CM_ABTF_SSD_PYTORCH + extra_cache_tags: abtf,ssd,pytorch,ml-model,cmc + update_tags_from_env_with_prefix: + _repo.: + - CM_ABTF_MODEL_CODE_GIT_URL + _branch.: + - CM_ABTF_MODEL_CODE_GIT_BRANCH + + +- tags: download,file + env: + CM_DOWNLOAD_CHECKSUM: <<>> + CM_DOWNLOAD_FINAL_ENV_NAME: CM_ML_MODEL_FILE_WITH_PATH + CM_DOWNLOAD_FILENAME: <<>> + CM_VERIFY_SSL: 'no' + force_cache: true + names: + - abtf-ml-model-weights + - abtf-ml-model-weights-download + skip_if_env: + CM_SKIP_MODEL_WEIGHTS_DOWNLOAD: + - 'yes' + update_tags_from_env_with_prefix: + _url.: + - CM_ML_MODEL_URL + + + +new_env_keys: +- CM_ML_MODEL_* + +print_env_at_the_end: + CM_ML_MODEL_FILE_WITH_PATH: Path to the ML model weights + CM_ML_MODEL_CODE_WITH_PATH: Path to the ML model code + + +variations: + e01: + env: + CM_ML_MODEL: abtf-ssd-pytorch + CM_ML_MODEL_CHECKSUM: 31d177228308bbe43917c912b01c2d67 + CM_ML_MODEL_DATASET: coco + CM_ML_MODEL_FILENAME: SSD_e1.pth + CM_ML_MODEL_IMAGE_HEIGHT: '300' + CM_ML_MODEL_IMAGE_WIDTH: '300' + CM_ML_MODEL_URL: https://www.dropbox.com/scl/fi/7nqt5z8gplgeaveo933eo/SSD_e1.pth?rlkey=7lyb4qs2hzg491bfprwcuvx54&dl=0 + group: model-weights + + e65: + env: + CM_ML_MODEL: abtf-ssd-pytorch + CM_ML_MODEL_CHECKSUM: f769eb0321ac7fc1c16f982db6131d2f + CM_ML_MODEL_DATASET: coco + CM_ML_MODEL_FILENAME: SSD_e65.pth + CM_ML_MODEL_IMAGE_HEIGHT: '300' + CM_ML_MODEL_IMAGE_WIDTH: '300' + CM_ML_MODEL_URL: https://www.dropbox.com/scl/fi/wkegl2qxvm8cefbqq00o3/SSD_e65.pth?rlkey=ez26jafjdcly665npl6pdqxl8&dl=0 + group: model-weights + + abtf-mvp: + env: + CM_ML_MODEL: abtf-ssd-pytorch + CM_ML_MODEL_CHECKSUM: 1ab66f523715f9564603626e94e59c8c + CM_ML_MODEL_DATASET: cognata + CM_ML_MODEL_FILENAME: baseline_8MP_ss_scales_all_ep60.pth + CM_ML_MODEL_IMAGE_SIZE: '8M' + CM_ML_MODEL_URL: https://www.dropbox.com/scl/fi/9un2i2169rgebui4xklnm/baseline_8MP_ss_scales_all_ep60.pth?rlkey=sez3dnjep4waa09s5uy4r3wmk&st=z859czgk&dl=0 + group: model-weights + + abtf-poc: + default_variations: + download-tool: rclone + env: + CM_ML_MODEL: abtf-ssd-pytorch + CM_ML_MODEL_CHECKSUM: 26845c3b9573ce115ef29dca4ae5be14 + CM_ML_MODEL_DATASET: cognata + CM_ML_MODEL_FILENAME: baseline_8MP_ss_scales_fm1_5x5_all_ep60.pth + CM_ML_MODEL_IMAGE_SIZE: '8M' + group: model-weights + + abtf-poc,gdrive: + env: + CM_ML_MODEL_URL: https://drive.google.com/file/d/1kfJR_bs54KONprVd51kZu0PYmmh1wZZa/view + + abtf-poc,rclone: + env: + CM_RCLONE_COPY_USING: copyurl + CM_ML_MODEL_URL: https://automotive.mlcommons-storage.org/SSD_ResNet50%2Fbaseline_8MP_ss_scales_fm1_5x5_all_ep60.pth + CM_RCLONE_CONFIG_CMD: '' + + + local.#: + env: + CM_ML_MODEL_FILENAME: '#' + CM_ML_MODEL_LOCAL: 'yes' + CM_SKIP_MODEL_WEIGHTS_DOWNLOAD: 'yes' + group: model-weights + + skip_weights: + default: true + env: + CM_SKIP_MODEL_WEIGHTS_DOWNLOAD: 'yes' + group: model-weights + + skip_code: + env: + CM_SKIP_MODEL_CODE_DOWNLOAD: 'yes' + + rclone: + group: download-tool + env: + CM_RCLONE_COPY_USING: copyurl + adr: + abtf-ml-model-weights-download: + tags: _rclone + + wget: + group: download-tool + default: true + adr: + abtf-ml-model-weights-download: + tags: _wget + gdown: + group: download-tool + env: + CM_DOWNLOAD_EXTRA_OPTIONS: " --fuzzy" + adr: + abtf-ml-model-weights-download: + tags: _gdown diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-abtf-ssd-pytorch/customize.py b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-abtf-ssd-pytorch/customize.py new file mode 100644 index 000000000..10a3a7f83 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-abtf-ssd-pytorch/customize.py @@ -0,0 +1,49 @@ +from cmind import utils +import os + + +def preprocess(i): + + os_info = i['os_info'] + + env = i['env'] + + if env.get('CM_ML_MODEL_LOCAL', '') == 'yes': + ml_model = env.get('CM_ML_MODEL_FILENAME', '') + if ml_model == '': + return {'return': 1, 'error': '_local.{model name.pth} is not specified'} + + if not os.path.isabs(ml_model): + ml_model = os.path.join( + env.get( + 'CM_TMP_CURRENT_PATH', + ''), + ml_model) + + if not os.path.isfile(ml_model): + return {'return': 1, + 'error': 'ML model {} is not found'.format(ml_model)} + + env['CM_ML_MODEL_FILE_WITH_PATH'] = ml_model + + return {'return': 0} + + +def postprocess(i): + + env = i['env'] + + if env.get('CM_ML_MODEL_FILE_WITH_PATH', '') == '': + env['CM_ML_MODEL_FILE_WITH_PATH'] = 'model-weights-skipped' + + env['CM_ML_MODEL_FILE'] = os.path.basename( + env['CM_ML_MODEL_FILE_WITH_PATH']) + + if env.get('CM_ABTF_SSD_PYTORCH', '') == '': + env['CM_ABTF_SSD_PYTORCH'] = 'model-code-skipped' + + env['CM_ML_MODEL_CODE_WITH_PATH'] = env['CM_ABTF_SSD_PYTORCH'] + + env['CM_GET_DEPENDENT_CACHED_PATH'] = env['CM_ML_MODEL_FILE_WITH_PATH'] + + return {'return': 0} diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-bert-large-squad/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-bert-large-squad/_cm.yaml index e5b4d11bb..e81819279 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-bert-large-squad/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-bert-large-squad/_cm.yaml @@ -13,6 +13,8 @@ new_env_keys: - CM_ML_MODEL* post_deps: - tags: get,dataset-aux,squad-vocab + force_env_keys: + - CM_OUTDIRNAME prehook_deps: - env: CM_DOWNLOAD_FINAL_ENV_NAME: CM_ML_MODEL_FILE_WITH_PATH @@ -25,6 +27,8 @@ prehook_deps: CM_ML_MODEL_BERT_PACKED: - 'yes' tags: download-and-extract + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-dlrm-terabyte/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-dlrm-terabyte/_cm.yaml index 6227a9a17..4d5c93f1b 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-dlrm-terabyte/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-dlrm-terabyte/_cm.yaml @@ -25,6 +25,8 @@ prehook_deps: names: - dae tags: download-and-extract + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-gptj/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-gptj/_cm.yaml index 25e8deca4..25b2ef981 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-gptj/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-gptj/_cm.yaml @@ -29,6 +29,8 @@ prehook_deps: names: - dae tags: download-and-extract + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_DOWNLOAD_URL @@ -159,6 +161,10 @@ variations: - python3 tags: get,python3 - tags: get,generic-python-lib,_package.safetensors + - tags: get,generic-python-lib,_torch + names: + - torch + - pytorch rclone: add_deps_recursive: dae: diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-gptj/run-nvidia.sh b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-gptj/run-nvidia.sh index 27e5a675c..b16ee45da 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-gptj/run-nvidia.sh +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-gptj/run-nvidia.sh @@ -17,5 +17,5 @@ export DOCKER_RUN_ARGS=" -v ${CM_NVIDIA_MLPERF_SCRATCH_PATH}:/mnt" make -C docker run LOCAL_USER=1 test $? -eq 0 || exit $? -${CM_PYTHON_BIN_WITH_PATH} ${CM_MLPERF_INFERENCE_NVIDIA_CODE_PATH}/code/gptj/tensorrt/onnx_tune.py --fp8-scalers-path=${CM_NVIDIA_MLPERF_SCRATCH_PATH}/models/GPTJ-6B/fp8-quantized-ammo/GPTJ-FP8-quantized/rank0.safetensors --scaler 1.005 --index 15 +PYTHONPATH='' ${CM_PYTHON_BIN_WITH_PATH} ${CM_MLPERF_INFERENCE_NVIDIA_CODE_PATH}/code/gptj/tensorrt/onnx_tune.py --fp8-scalers-path=${CM_NVIDIA_MLPERF_SCRATCH_PATH}/models/GPTJ-6B/fp8-quantized-ammo/GPTJ-FP8-quantized/rank0.safetensors --scaler 1.005 --index 15 test $? -eq 0 || exit $? diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-huggingface-zoo/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-huggingface-zoo/_cm.yaml index d10c3f448..b8235a57d 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-huggingface-zoo/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-huggingface-zoo/_cm.yaml @@ -35,9 +35,15 @@ uid: 53cf8252a443446a variations: clone-repo: deps: + - tags: get,hf-cli,_with-login + enable_if_env: + CM_HF_TOKEN: + - on - env: CM_GIT_CHECKOUT_PATH_ENV_NAME: CM_ML_MODEL_PATH tags: get,git,repo,_lfs + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _repo.https://huggingface.co/: - CM_MODEL_ZOO_STUB diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-llama2/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-llama2/_cm.yaml index 2ff45866b..fe082718e 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-llama2/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-llama2/_cm.yaml @@ -26,6 +26,8 @@ prehook_deps: names: - hf-zoo tags: get,ml-model,huggingface,zoo,_clone-repo + force_env_keys: + - CM_OUTDIRNAME print_env_at_the_end: LLAMA2_CHECKPOINT_PATH: LLAMA2 checkpoint path tags: diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-llama3/COPYRIGHT.md b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-llama3/COPYRIGHT.md new file mode 100644 index 000000000..a059b0c49 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-llama3/COPYRIGHT.md @@ -0,0 +1,9 @@ +# Copyright Notice + +© 2024-2025 MLCommons. All Rights Reserved. + +This file is licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. A copy of the License can be obtained at: + +[Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) + +Unless required by applicable law or agreed to in writing, software distributed under the License is provided on an "AS IS" basis, without warranties or conditions of any kind, either express or implied. Please refer to the License for the specific language governing permissions and limitations under the License. diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-llama3/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-llama3/_cm.yaml new file mode 100644 index 000000000..376553823 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-llama3/_cm.yaml @@ -0,0 +1,68 @@ +alias: get-ml-model-llama3 +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: true +category: AI/ML models +input_mapping: + outdirname: CM_OUTDIRNAME +new_env_keys: +- CM_ML_MODEL_* +- LLAMA3_CHECKPOINT_PATH +prehook_deps: +- enable_if_env: + CM_TMP_REQUIRE_DOWNLOAD: + - 'yes' + env: {} + extra_cache_tags: llama3,llama-3 + force_env_keys: + - CM_GIT_CHECKOUT_FOLDER + names: + - hf-zoo + tags: get,ml-model,huggingface,zoo,_clone-repo +print_env_at_the_end: + LLAMA3_CHECKPOINT_PATH: LLAMA3 checkpoint path +tags: +- get +- raw +- ml-model +- language-processing +- llama3 +- llama3-405b +uid: 2f8cef2acc334e80 +variations: + fp16: + default: true + env: + CM_ML_MODEL_INPUT_DATA_TYPES: fp16 + CM_ML_MODEL_PRECISION: fp16 + CM_ML_MODEL_WEIGHT_DATA_TYPES: fp16 + group: precision + meta-llama/Llama-3.1-405B-Instruct: + adr: + hf-zoo: + tags: _model-stub.meta-llama/Llama-3.1-405B-Instruct + default: true + env: + CM_ML_MODEL_NAME: Llama-3-405b-instruct + CM_MODEL_ZOO_ENV_KEY: LLAMA3 + group: huggingface-stub + meta-llama/Llama-3.1-8B-Instruct: + adr: + hf-zoo: + tags: _model-stub.meta-llama/Llama-3.1-8B-Instruct + env: + CM_ML_MODEL_NAME: Llama-3-8b-instruct + CM_MODEL_ZOO_ENV_KEY: LLAMA3 + group: huggingface-stub + vllm: + default: true + env: + CM_ML_MODEL_FRAMEWORK: vllm + group: framework + stub.#: + adr: + hf-zoo: + tags: _model-stub.# + env: + CM_MODEL_ZOO_ENV_KEY: LLAMA3 + group: huggingface-stub diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-llama3/customize.py b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-llama3/customize.py new file mode 100644 index 000000000..9ec7edecd --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-llama3/customize.py @@ -0,0 +1,35 @@ +from cmind import utils +import os + + +def preprocess(i): + + os_info = i['os_info'] + env = i['env'] + + # skip download and register in cache if the llama3 checkpoint path is + # already defined by the user + if env.get('CM_ML_MODEL_LLAMA3_CHECKPOINT_PATH', '') != '': + env['LLAMA3_CHECKPOINT_PATH'] = env['CM_ML_MODEL_LLAMA3_CHECKPOINT_PATH'] + return {'return': 0} + + path = env.get('CM_OUTDIRNAME', '').strip() + + if path != "": + os.makedirs(path, exist_ok=True) + env['CM_GIT_CHECKOUT_FOLDER'] = os.path.join( + path, env['CM_ML_MODEL_NAME']) + + env['CM_TMP_REQUIRE_DOWNLOAD'] = 'yes' + + return {'return': 0} + + +def postprocess(i): + + env = i['env'] + + env['CM_ML_MODEL_LLAMA3_CHECKPOINT_PATH'] = env['LLAMA3_CHECKPOINT_PATH'] + env['CM_GET_DEPENDENT_CACHED_PATH'] = env['CM_ML_MODEL_PATH'] + + return {'return': 0} diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-mixtral/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-mixtral/_cm.yaml index 2542d4dc7..358d56318 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-mixtral/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-mixtral/_cm.yaml @@ -6,6 +6,8 @@ category: AI/ML models env: CM_ML_MODEL_DATASET: '' CM_ML_MODEL_WEIGHT_TRANSFORMATIONS: 'no' +docker: + real_run: False input_mapping: checkpoint: MIXTRAL_CHECKPOINT_PATH new_env_keys: @@ -22,6 +24,8 @@ prehook_deps: names: - hf-zoo tags: get,ml-model,huggingface,zoo,_clone-repo + force_env_keys: + - CM_OUTDIRNAME print_env_at_the_end: MIXTRAL_CHECKPOINT_PATH: MIXTRAL checkpoint path tags: diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-retinanet-nvidia/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-retinanet-nvidia/_cm.yaml index 4e114e43d..7f6880322 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-retinanet-nvidia/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-retinanet-nvidia/_cm.yaml @@ -14,6 +14,8 @@ deps: - tags: get,mlperf,training,src,_nvidia-retinanet - tags: get,mlperf,inference,src - tags: get,ml-model,retinanet,_pytorch,_fp32,_weights + force_env_keys: + - CM_OUTDIRNAME - enable_if_env: CM_TORCH_DEVICE: cpu tags: get,generic-python-lib,_torch diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-retinanet/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-retinanet/_cm.yaml index 90e937000..8da05da0e 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-retinanet/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-retinanet/_cm.yaml @@ -28,6 +28,8 @@ prehook_deps: CM_TMP_ML_MODEL_RETINANET_NO_NMS: - 'yes' tags: download-and-extract + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL @@ -80,6 +82,8 @@ variations: extra_cache_tags: retinanet,training,patch,file force_cache: true tags: download,file,_url.https://raw.githubusercontent.com/arjunsuresh/ck-qaic/main/package/model-onnx-mlperf-retinanet-no-nms/remove-nms-and-extract-priors.patch + force_env_keys: + - CM_OUTDIRNAME - env: CM_GIT_CHECKOUT_PATH_ENV_NAME: CM_MLPERF_TRAINING_REPO_PATCHED_PATH CM_GIT_PATCH_FILEPATHS: <<>> diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-rgat/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-rgat/_cm.yaml index 0bc4b1eab..27a7e39e2 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-rgat/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-rgat/_cm.yaml @@ -3,15 +3,15 @@ automation_alias: script automation_uid: 5b4e0237da074764 cache: true category: AI/ML models +docker: + fake_run_deps: True env: CM_ML_MODEL: RGAT - CM_ML_MODEL_DATASET: ICBH input_mapping: checkpoint: RGAT_CHECKPOINT_PATH - download_path: CM_DOWNLOAD_PATH - to: CM_DOWNLOAD_PATH new_env_keys: - CM_ML_MODEL_* +- CM_ML_MODEL_RGAT_CHECKPOINT_PATH - RGAT_CHECKPOINT_PATH prehook_deps: - enable_if_env: @@ -20,12 +20,14 @@ prehook_deps: CM_TMP_REQUIRE_DOWNLOAD: - 'yes' env: - CM_DOWNLOAD_FINAL_ENV_NAME: CM_ML_MODEL_PATH - extra_cache_tags: rgat,gnn,model + CM_DOWNLOAD_FINAL_ENV_NAME: RGAT_DIR_PATH + extra_cache_tags: rgat,gnn,model,ml-model force_cache: true names: - - dae - tags: download-and-extract + - download-file + tags: download,file + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_DOWNLOAD_URL @@ -54,7 +56,7 @@ variations: group: download-source rclone: adr: - dae: + download-file: tags: _rclone env: CM_DOWNLOAD_TOOL: rclone @@ -62,4 +64,6 @@ variations: group: download-tool rclone,fp32: env: + CM_ML_MODEL_STARTING_WEIGHTS_FILENAME: https://github.com/mlcommons/inference/tree/master/graph/R-GAT#download-model-using-rclone CM_DOWNLOAD_URL: mlc-inference:mlcommons-inference-wg-public/R-GAT/RGAT.pt + CM_DOWNLOAD_FILENAME: RGAT diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-rgat/customize.py b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-rgat/customize.py index dbecb0d8a..99e6731ec 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-rgat/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-rgat/customize.py @@ -18,10 +18,17 @@ def preprocess(i): os_info = i['os_info'] env = i['env'] + download_dir = env.get('CM_OUTDIRNAME', '') + path = env.get('RGAT_CHECKPOINT_PATH', '').strip() if path == '' or not os.path.exists(path): - env['CM_TMP_REQUIRE_DOWNLOAD'] = 'yes' + if download_dir != '' and os.path.exists( + os.path.join(download_dir, "RGAT", "RGAT.pt")): + env['RGAT_CHECKPOINT_PATH'] = os.path.join( + download_dir, "RGAT", "RGAT.pt") + else: + env['CM_TMP_REQUIRE_DOWNLOAD'] = 'yes' return {'return': 0} @@ -32,10 +39,15 @@ def postprocess(i): if env.get('RGAT_CHECKPOINT_PATH', '') == '': env['RGAT_CHECKPOINT_PATH'] = os.path.join( - env['CM_ML_MODEL_PATH'], "RGAT.pt") - elif env.get('CM_ML_MODEL_PATH', '') == '': - env['CM_ML_MODEL_PATH'] = env['RGAT_CHECKPOINT_PATH'] + env['RGAT_DIR_PATH'], "RGAT.pt") + + if env.get('CM_ML_MODEL_RGAT_CHECKPOINT_PATH', '') == '': + env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] = env['RGAT_CHECKPOINT_PATH'] + + if env.get('CM_ML_MODEL_PATH', '') == '': + env['CM_ML_MODEL_PATH'] = env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] - env['CM_GET_DEPENDENT_CACHED_PATH'] = env['RGAT_CHECKPOINT_PATH'] + env['RGAT_CHECKPOINT_PATH'] = env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] + env['CM_GET_DEPENDENT_CACHED_PATH'] = env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] return {'return': 0} diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-stable-diffusion/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-stable-diffusion/_cm.yaml index b2326daff..ae9ee2757 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-stable-diffusion/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-stable-diffusion/_cm.yaml @@ -28,6 +28,8 @@ prehook_deps: names: - hf-zoo tags: get,ml-model,huggingface,zoo,_clone-repo,_model-stub.stabilityai/stable-diffusion-xl-base-1.0 + force_env_keys: + - CM_OUTDIRNAME - enable_if_env: CM_DOWNLOAD_TOOL: - rclone diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-tiny-resnet/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-tiny-resnet/_cm.yaml index 4f8406e29..791ecccee 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-tiny-resnet/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-tiny-resnet/_cm.yaml @@ -22,6 +22,8 @@ prehook_deps: env: CM_EXTRACT_EXTRACTED_FILENAME: <<>> tags: download-and-extract + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL diff --git a/cmx4mlops/cmx4mlops/repo/script/get-mlperf-automotive-scratch-space/COPYRIGHT.md b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-automotive-scratch-space/COPYRIGHT.md new file mode 100644 index 000000000..a059b0c49 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-automotive-scratch-space/COPYRIGHT.md @@ -0,0 +1,9 @@ +# Copyright Notice + +© 2024-2025 MLCommons. All Rights Reserved. + +This file is licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. A copy of the License can be obtained at: + +[Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) + +Unless required by applicable law or agreed to in writing, software distributed under the License is provided on an "AS IS" basis, without warranties or conditions of any kind, either express or implied. Please refer to the License for the specific language governing permissions and limitations under the License. diff --git a/cmx4mlops/cmx4mlops/repo/script/get-mlperf-automotive-scratch-space/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-automotive-scratch-space/_cm.yaml new file mode 100644 index 000000000..cefe6da4c --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-automotive-scratch-space/_cm.yaml @@ -0,0 +1,39 @@ +alias: get-mlperf-automotive-scratch-space +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: true +category: MLPerf benchmark support +deps: [] +docker: + run: false +input_description: {} +input_mapping: + scratch_path: CM_ABTF_SCRATCH_PATH +new_env_keys: +- CM_ABTF_SCRATCH_PATH +- CM_ABTF_SCRATCH_PATH_MODELS +- CM_ABTF_SCRATCH_PATH_DATASETS +- CM_ABTF_SCRATCH_VERSION +new_state_keys: [] +post_deps: [] +posthook_deps: [] +prehook_deps: [] +tags: +- get +- abtf +- inference +- scratch +- space +uid: c384b7604e5c47d5 +variations: + version.#: + env: + CM_ABTF_SCRATCH_VERSION: '#' + group: version + version.4_0: + default: true + env: + CM_ABTF_SCRATCH_VERSION: '4_0' + group: version +versions: {} + diff --git a/cmx4mlops/cmx4mlops/repo/script/get-mlperf-automotive-scratch-space/customize.py b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-automotive-scratch-space/customize.py new file mode 100644 index 000000000..057acd2c4 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-automotive-scratch-space/customize.py @@ -0,0 +1,40 @@ +from cmind import utils +import os + + +def preprocess(i): + + os_info = i['os_info'] + + env = i['env'] + + meta = i['meta'] + + automation = i['automation'] + + quiet = (env.get('CM_QUIET', False) == 'yes') + + if env.get('CM_ABTF_SCRATCH_PATH', '') == '': + env['CM_ABTF_SCRATCH_PATH'] = os.getcwd() + + return {'return': 0} + + +def postprocess(i): + + env = i['env'] + + env['CM_ABTF_SCRATCH_PATH_MODELS'] = os.path.join( + env['CM_ABTF_SCRATCH_PATH'], "models") + env['CM_ABTF_SCRATCH_PATH_DATASETS'] = os.path.join( + env['CM_ABTF_SCRATCH_PATH'], "datasets") + + if not os.path.exists(env['CM_ABTF_SCRATCH_PATH_MODELS']): + os.makedirs(env['CM_ABTF_SCRATCH_PATH_MODELS']) + + if not os.path.exists(env['CM_ABTF_SCRATCH_PATH_DATASETS']): + os.makedirs(env['CM_ABTF_SCRATCH_PATH_DATASETS']) + + env['CM_GET_DEPENDENT_CACHED_PATH'] = env['CM_ABTF_SCRATCH_PATH'] + + return {'return': 0} diff --git a/cmx4mlops/cmx4mlops/repo/script/get-mlperf-automotive-scratch-space/run.bat b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-automotive-scratch-space/run.bat new file mode 100644 index 000000000..648302ca7 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-automotive-scratch-space/run.bat @@ -0,0 +1 @@ +rem native script diff --git a/cmx4mlops/cmx4mlops/repo/script/get-mlperf-automotive-scratch-space/run.sh b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-automotive-scratch-space/run.sh new file mode 100644 index 000000000..3a584c10c --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-automotive-scratch-space/run.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +#CM Script location: ${CM_TMP_CURRENT_SCRIPT_PATH} + +#To export any variable +#echo "VARIABLE_NAME=VARIABLE_VALUE" >>tmp-run-env.out + +#${CM_PYTHON_BIN_WITH_PATH} contains the path to python binary if "get,python" is added as a dependency + + + +function exit_if_error() { + test $? -eq 0 || exit $? +} + +function run() { + echo "Running: " + echo "$1" + echo "" + if [[ ${CM_FAKE_RUN} != 'yes' ]]; then + eval "$1" + exit_if_error + fi +} + +#Add your run commands here... +# run "$CM_RUN_CMD" diff --git a/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-loadgen/run.sh b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-loadgen/run.sh index ac61ad329..47885f150 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-loadgen/run.sh +++ b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-loadgen/run.sh @@ -25,14 +25,14 @@ cmake \ -DCMAKE_INSTALL_PREFIX="${INSTALL_DIR}" \ "${CM_MLPERF_INFERENCE_SOURCE}/loadgen" \ -DPYTHON_EXECUTABLE:FILEPATH="${CM_PYTHON_BIN_WITH_PATH}" -B . -if [ ${?} -ne 0 ]; then exit $?; fi +test $? -eq 0 || exit $? echo "******************************************************" CM_MAKE_CORES=${CM_MAKE_CORES:-${CM_HOST_CPU_TOTAL_CORES}} CM_MAKE_CORES=${CM_MAKE_CORES:-2} cmake --build . --target install -j "${CM_MAKE_CORES}" -if [ ${?} -ne 0 ]; then exit $?; fi +test $? -eq 0 || exit $? # Clean build directory (too large) cd "${CUR_DIR}" @@ -43,8 +43,7 @@ fi cd "${CM_MLPERF_INFERENCE_SOURCE}/loadgen" ${CM_PYTHON_BIN_WITH_PATH} -m pip install . --target="${MLPERF_INFERENCE_PYTHON_SITE_BASE}" - -if [ ${?} -ne 0 ]; then exit $?; fi +test $? -eq 0 || exit $? # Clean the built wheel #find . -name 'mlcommons_loadgen*.whl' | xargs rm diff --git a/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-src/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-src/_cm.yaml index e19e65378..a9f7410a5 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-src/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-src/_cm.yaml @@ -24,6 +24,7 @@ new_env_keys: - CM_MLPERF_INFERENCE_DLRM_V2_PATH - CM_MLPERF_INFERENCE_GPTJ_PATH - CM_MLPERF_INFERENCE_RNNT_PATH +- CM_MLPERF_INFERENCE_RGAT_PATH - CM_MLPERF_INFERENCE_SOURCE - CM_MLPERF_INFERENCE_SOURCE_VERSION - CM_MLPERF_INFERENCE_VERSION @@ -49,8 +50,6 @@ prehook_deps: _submodules.: - CM_GIT_SUBMODULES print_env_at_the_end_disabled: - CM_MLPERF_INFERENCE_CONF_PATH: Path to the MLPerf inference benchmark configuration - file CM_MLPERF_INFERENCE_SOURCE: Path to MLPerf inference benchmark sources tags: - get @@ -134,38 +133,55 @@ variations: versions: custom: env: - CM_MLPERF_LAST_RELEASE: v4.1 + CM_MLPERF_LAST_RELEASE: v5.0 deepsparse: env: - CM_MLPERF_LAST_RELEASE: v4.1 + CM_MLPERF_LAST_RELEASE: v5.0 CM_TMP_GIT_CHECKOUT: deepsparse CM_TMP_GIT_URL: https://github.com/neuralmagic/inference main: env: - CM_MLPERF_LAST_RELEASE: v4.1 + CM_MLPERF_LAST_RELEASE: v5.0 CM_TMP_GIT_CHECKOUT: main master: env: - CM_MLPERF_LAST_RELEASE: v4.1 + CM_MLPERF_LAST_RELEASE: v5.0 CM_TMP_GIT_CHECKOUT: master r2.1: env: CM_MLPERF_LAST_RELEASE: v2.1 CM_TMP_GIT_CHECKOUT: v2.1 r3.0: - adr: + ad: inference-git-repo: tags: _tag.v3.0 env: CM_MLPERF_LAST_RELEASE: v3.0 CM_TMP_GIT_CHECKOUT: '' r3.1: - adr: + ad: inference-git-repo: tags: _tag.v3.1 env: CM_MLPERF_LAST_RELEASE: v3.1 - CM_TMP_GIT_CHECKOUT: '' + CM_GIT_CHECKOUT_TAG: 'v3.1' + r4.0: + ad: + inference-git-repo: + tags: _tag.v4.0 + env: + CM_MLPERF_LAST_RELEASE: v4.0 + CM_GIT_CHECKOUT_TAG: 'v4.0' + r4.1: + ad: + inference-git-repo: + tags: _tag.v4.1 + env: + CM_MLPERF_LAST_RELEASE: v4.1 + CM_GIT_CHECKOUT_TAG: 'v4.1' + r5.0: + env: + CM_MLPERF_LAST_RELEASE: v5.0 tvm: env: CM_MLPERF_LAST_RELEASE: v3.1 diff --git a/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-src/customize.py b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-src/customize.py index 0f6f10e2b..4076ebe00 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-src/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-src/customize.py @@ -52,7 +52,8 @@ def preprocess(i): # if not try to assign the values specified in version parameters, # if version parameters does not have the value to a parameter, set the # default one - if env.get('CM_GIT_CHECKOUT', '') == '': + if env.get('CM_GIT_CHECKOUT', '') == '' and env.get( + 'CM_GIT_CHECKOUT_TAG', '') == '': if env.get('CM_TMP_GIT_CHECKOUT', '') != '': env["CM_GIT_CHECKOUT"] = env["CM_TMP_GIT_CHECKOUT"] else: @@ -65,7 +66,7 @@ def preprocess(i): env["CM_GIT_URL"] = "https://github.com/mlcommons/inference" if env.get("CM_MLPERF_LAST_RELEASE", '') == '': - env["CM_MLPERF_LAST_RELEASE"] = "v4.1" + env["CM_MLPERF_LAST_RELEASE"] = "v5.0" if 'CM_GIT_DEPTH' not in env: env['CM_GIT_DEPTH'] = '' @@ -120,6 +121,8 @@ def postprocess(i): inference_root, 'recommendation', 'dlrm') env['CM_MLPERF_INFERENCE_DLRM_V2_PATH'] = os.path.join( inference_root, 'recommendation', 'dlrm_v2') + env['CM_MLPERF_INFERENCE_RGAT_PATH'] = os.path.join( + inference_root, 'graph', 'R-GAT') env['CM_MLPERF_INFERENCE_3DUNET_PATH'] = os.path.join( inference_root, 'vision', 'medical_imaging', '3d-unet-kits19') diff --git a/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-sut-configs/configs/RTX4090x1/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-sut-configs/configs/RTX4090x1/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml index a9ad05a50..d764ab24d 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-sut-configs/configs/RTX4090x1/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-sut-configs/configs/RTX4090x1/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml @@ -28,9 +28,8 @@ retinanet: target_qps: 850.0 Server: target_qps: 630.0 -sdxl: +stable-diffusion-xl: Offline: target_qps: 0.7 Server: - target_qps: 0.3 - + target_qps: 0.3 diff --git a/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-sut-configs/configs/RTX4090x2/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-sut-configs/configs/RTX4090x2/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml index 4820e8b52..294b2eda7 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-sut-configs/configs/RTX4090x2/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-sut-configs/configs/RTX4090x2/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml @@ -35,7 +35,7 @@ target_qps: 8 Server: target_qps: 7 - sdxl: + stable-diffusion-xl: Offline: target_qps: 1.3 Server: diff --git a/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-utils/customize.py b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-utils/customize.py index ec9fe4ddb..efbd039c8 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-utils/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-utils/customize.py @@ -26,7 +26,7 @@ def preprocess(i): quiet = (env.get('CM_QUIET', False) == 'yes') - utils_path = i['run_script_input']['path'] + utils_path = env['CM_TMP_CURRENT_SCRIPT_PATH'] env['+PYTHONPATH'] = [utils_path] diff --git a/cmx4mlops/cmx4mlops/repo/script/get-tensorrt/customize.py b/cmx4mlops/cmx4mlops/repo/script/get-tensorrt/customize.py index 7f0bbe977..f3eb69a54 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-tensorrt/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/get-tensorrt/customize.py @@ -24,7 +24,7 @@ def preprocess(i): # Not enforcing dev requirement for now if env.get('CM_TENSORRT_TAR_FILE_PATH', '') == '' and env.get( - 'CM_TENSORRT_REQUIRE_DEV1', '') != 'yes' and env.get('CM_HOST_PLATFORM_FLAVOR', '') != 'aarch64': + 'CM_TENSORRT_REQUIRE_DEV1', '') != 'yes' and env.get('CM_HOST_PLATFORM_FLAVOR_', '') != 'aarch64': if os_info['platform'] == 'windows': extra_pre = '' diff --git a/cmx4mlops/cmx4mlops/repo/script/install-python-src/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/install-python-src/_cm.yaml index c0a618346..5aeed2a6b 100644 --- a/cmx4mlops/cmx4mlops/repo/script/install-python-src/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/install-python-src/_cm.yaml @@ -16,6 +16,9 @@ deps: - tags: detect,cpu - tags: get,generic-sys-util,_libffi-dev - tags: get,generic-sys-util,_libbz2-dev + enable_if_env: + CM_HOST_OS_FLAVOR: + - ubuntu - tags: get,generic-sys-util,_libssl-dev - enable_if_env: CM_HOST_OS_FLAVOR: diff --git a/cmx4mlops/cmx4mlops/repo/script/preprocess-mlperf-inference-submission/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/preprocess-mlperf-inference-submission/_cm.yaml index 40ff0c669..eb5f959b4 100644 --- a/cmx4mlops/cmx4mlops/repo/script/preprocess-mlperf-inference-submission/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/preprocess-mlperf-inference-submission/_cm.yaml @@ -22,6 +22,7 @@ deps: input_mapping: input: CM_MLPERF_INFERENCE_SUBMISSION_DIR submission_dir: CM_MLPERF_INFERENCE_SUBMISSION_DIR + version: CM_MLPERF_SUBMISSION_CHECKER_VERSION submitter: CM_MLPERF_SUBMITTER tags: - run diff --git a/cmx4mlops/cmx4mlops/repo/script/preprocess-mlperf-inference-submission/customize.py b/cmx4mlops/cmx4mlops/repo/script/preprocess-mlperf-inference-submission/customize.py index 28ceaf7f8..c8c43e295 100644 --- a/cmx4mlops/cmx4mlops/repo/script/preprocess-mlperf-inference-submission/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/preprocess-mlperf-inference-submission/customize.py @@ -40,8 +40,11 @@ def preprocess(i): print(f"Cleaning {submission_processed}") shutil.rmtree(submission_processed) + version = env.get('CM_MLPERF_SUBMISSION_CHECKER_VERSION', '') + x_version = ' --version ' + version + ' ' if version != '' else '' + CMD = env['CM_PYTHON_BIN'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "tools", "submission", - "preprocess_submission.py") + "' --input '" + submission_dir + "' --submitter '" + submitter + "' --output '" + submission_processed + "'" + "preprocess_submission.py") + "' --input '" + submission_dir + "' --submitter '" + submitter + "' --output '" + submission_processed + "'" + x_version env['CM_RUN_CMD'] = CMD return {'return': 0} diff --git a/cmx4mlops/cmx4mlops/repo/script/process-mlperf-accuracy/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/process-mlperf-accuracy/_cm.yaml index f6d9acd5e..3b80194d4 100644 --- a/cmx4mlops/cmx4mlops/repo/script/process-mlperf-accuracy/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/process-mlperf-accuracy/_cm.yaml @@ -261,3 +261,11 @@ variations: env: CM_DATASET: terabyte group: dataset + igbh: + env: + CM_DATASET: igbh + group: dataset + dataset_llama3: + env: + CM_DATASET: dataset_llama3 + group: dataset diff --git a/cmx4mlops/cmx4mlops/repo/script/process-mlperf-accuracy/customize.py b/cmx4mlops/cmx4mlops/repo/script/process-mlperf-accuracy/customize.py index 1e4363da6..ba41d02c9 100644 --- a/cmx4mlops/cmx4mlops/repo/script/process-mlperf-accuracy/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/process-mlperf-accuracy/customize.py @@ -129,18 +129,30 @@ def preprocess(i): extra_options = "" if env.get('CM_SDXL_STATISTICS_FILE_PATH', '') != '': - extra_options += f" --statistics-path '{env['CM_SDXL_STATISTICS_FILE_PATH']}' " + extra_options += ( + f""" --statistics-path '{ + env['CM_SDXL_STATISTICS_FILE_PATH']}'""" + ) if env.get('CM_SDXL_COMPLIANCE_IMAGES_PATH', '') != '': - extra_options += f" --compliance-images-path '{env['CM_SDXL_COMPLIANCE_IMAGES_PATH']}' " + extra_options += ( + f""" --compliance-images-path '{ + env['CM_SDXL_COMPLIANCE_IMAGES_PATH']}' """ + ) else: - extra_options += f""" --compliance-images-path '{os.path.join(result_dir, "images")}' """ + extra_options += f""" --compliance-images-path '{ + os.path.join( + result_dir, "images")}' """ if env.get('CM_COCO2014_SAMPLE_ID_PATH', '') != '': - extra_options += f" --ids-path '{env['CM_COCO2014_SAMPLE_ID_PATH']}' " + extra_options += ( + f" --ids-path '{env['CM_COCO2014_SAMPLE_ID_PATH']}' " + ) if env.get('CM_SDXL_ACCURACY_RUN_DEVICE', '') != '': - extra_options += f" --device '{env['CM_SDXL_ACCURACY_RUN_DEVICE']}' " + extra_options += ( + f" --device '{env['CM_SDXL_ACCURACY_RUN_DEVICE']}' " + ) # env['DATASET_ANNOTATIONS_FILE_PATH'] = env['CM_DATASET_ANNOTATIONS_FILE_PATH'] CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "text_to_image", "tools", @@ -173,15 +185,35 @@ def preprocess(i): elif dataset == "terabyte": extra_options = "" if env.get('CM_DLRM_V2_AGGREGATION_TRACE_FILE_PATH', '') != '': - extra_options += f" --aggregation-trace-file '{env['CM_DLRM_V2_AGGREGATION_TRACE_FILE_PATH']}' " + extra_options += ( + f""" --aggregation-trace-file '{ + env['CM_DLRM_V2_AGGREGATION_TRACE_FILE_PATH']}' """ + ) if env.get('CM_DLRM_V2_DAY23_FILE_PATH', '') != '': - extra_options += f" --day-23-file '{env['CM_DLRM_V2_DAY23_FILE_PATH']}' " + extra_options += ( + f""" --day-23-file '{ + env['CM_DLRM_V2_DAY23_FILE_PATH']}' """ + ) CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_DLRM_V2_PATH'], "pytorch", "tools", "accuracy-dlrm.py") + "' --mlperf-accuracy-file '" + os.path.join(result_dir, "mlperf_log_accuracy.json") + "'" + extra_options + \ " --dtype " + env.get('CM_ACCURACY_DTYPE', "float32") + " > '" + out_file + "'" + elif dataset == "igbh": + if env.get('CM_DATASET_IGBH_SIZE', '') == '': + if env.get('CM_MLPERF_SUBMISSION_GENERATION_STYLE', + '') == "full": + env['CM_DATASET_IGBH_SIZE'] = "full" + else: + env['CM_DATASET_IGBH_SIZE'] = "tiny" + CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "graph", "R-GAT", "tools", "accuracy_igbh.py") + "' --mlperf-accuracy-file '" + os.path.join( + result_dir, "mlperf_log_accuracy.json") + "' --dataset-path '" + env['CM_DATASET_IGBH_PATH'] + "' --dataset-size '" + env['CM_DATASET_IGBH_SIZE'] + "' --output-file '" + out_file + "'" + + elif dataset == "dataset_llama3": + CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "language", "llama3.1-405b", "evaluate-accuracy.py") + "' --checkpoint-path '" + env['CM_ML_MODEL_LLAMA3_CHECKPOINT_PATH'] + "' --mlperf-accuracy-file '" + os.path.join( + result_dir, "mlperf_log_accuracy.json") + "' --dtype '" + env['CM_ACCURACY_DTYPE'] + "' --dataset-file '" + env['CM_DATASET_LLAMA3_PATH'] + "' > '" + out_file + "'" + else: return {'return': 1, 'error': 'Unsupported dataset'} diff --git a/cmx4mlops/cmx4mlops/repo/script/pull-git-repo/customize.py b/cmx4mlops/cmx4mlops/repo/script/pull-git-repo/customize.py index 7f7633ec2..1e91c785b 100644 --- a/cmx4mlops/cmx4mlops/repo/script/pull-git-repo/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/pull-git-repo/customize.py @@ -18,9 +18,6 @@ def preprocess(i): os_info = i['os_info'] - if os_info['platform'] == 'windows': - return {'return': 1, 'error': 'Windows is not supported in this script yet'} - env = i['env'] meta = i['meta'] diff --git a/cmx4mlops/cmx4mlops/repo/script/pull-git-repo/run.bat b/cmx4mlops/cmx4mlops/repo/script/pull-git-repo/run.bat new file mode 100644 index 000000000..8642fce0e --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/pull-git-repo/run.bat @@ -0,0 +1,26 @@ +@echo off +setlocal enabledelayedexpansion + +REM Save the current directory +set "CUR_DIR=%CD%" +set "SCRIPT_DIR=%CM_TMP_CURRENT_SCRIPT_PATH%" + +REM Change to the specified path +set "path=%CM_GIT_CHECKOUT_PATH%" +echo cd %path% + +cd /d "%path%" +if errorlevel 1 ( + echo Failed to change directory to %path% + exit /b %errorlevel% +) + +REM Execute the Git pull command +echo %CM_GIT_PULL_CMD% +call %CM_GIT_PULL_CMD% +REM Don't fail if there are local changes +REM if errorlevel 1 exit /b %errorlevel% + +REM Return to the original directory +cd /d "%CUR_DIR%" +endlocal diff --git a/cmx4mlops/cmx4mlops/repo/script/push-mlperf-inference-results-to-github/customize.py b/cmx4mlops/cmx4mlops/repo/script/push-mlperf-inference-results-to-github/customize.py index 0ea2d2ce8..3d52964a0 100644 --- a/cmx4mlops/cmx4mlops/repo/script/push-mlperf-inference-results-to-github/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/push-mlperf-inference-results-to-github/customize.py @@ -12,6 +12,7 @@ from cmind import utils import cmind as cm import os +from giturlparse import parse def preprocess(i): @@ -43,6 +44,13 @@ def preprocess(i): env['CM_MLPERF_RESULTS_REPO_COMMIT_MESSAGE'] = env.get( 'CM_MLPERF_RESULTS_REPO_COMMIT_MESSAGE', 'Added new results') + p = parse(repo) + if env.get('CM_GITHUB_PAT', '') != '': + token = env['CM_GITHUB_PAT'] + env['CM_GIT_PUSH_CMD'] = f"""git push https://x-access-token:{env['CM_GITHUB_PAT']}@{p.host}/{p.owner}/{p.repo}""" + else: + env['CM_GIT_PUSH_CMD'] = "git push" + return {'return': 0} diff --git a/cmx4mlops/cmx4mlops/repo/script/push-mlperf-inference-results-to-github/run.bat b/cmx4mlops/cmx4mlops/repo/script/push-mlperf-inference-results-to-github/run.bat index 2052eb564..085727d19 100644 --- a/cmx4mlops/cmx4mlops/repo/script/push-mlperf-inference-results-to-github/run.bat +++ b/cmx4mlops/cmx4mlops/repo/script/push-mlperf-inference-results-to-github/run.bat @@ -25,7 +25,11 @@ REM Check if the previous command was successful if %errorlevel% neq 0 exit /b %errorlevel% git commit -a -m "%CM_MLPERF_RESULTS_REPO_COMMIT_MESSAGE%" -git push + +if defined CM_MLPERF_INFERENCE_SUBMISSION_DIR call %CM_SET_REMOTE_URL_CMD% + +echo "%CM_GIT_PUSH_CMD%" +%CM_GIT_PUSH_CMD% REM Check if the previous command was successful if %errorlevel% neq 0 exit /b %errorlevel% diff --git a/cmx4mlops/cmx4mlops/repo/script/push-mlperf-inference-results-to-github/run.sh b/cmx4mlops/cmx4mlops/repo/script/push-mlperf-inference-results-to-github/run.sh index 1eb4f663e..8b6ac5648 100644 --- a/cmx4mlops/cmx4mlops/repo/script/push-mlperf-inference-results-to-github/run.sh +++ b/cmx4mlops/cmx4mlops/repo/script/push-mlperf-inference-results-to-github/run.sh @@ -16,5 +16,8 @@ fi test $? -eq 0 || exit $? git commit -a -m "${CM_MLPERF_RESULTS_REPO_COMMIT_MESSAGE}" -git push + +echo ${CM_GIT_PUSH_CMD} +${CM_GIT_PUSH_CMD} + test $? -eq 0 || exit $? diff --git a/cmx4mlops/cmx4mlops/repo/script/reproduce-mlperf-inference-dummy/COPYRIGHT.md b/cmx4mlops/cmx4mlops/repo/script/reproduce-mlperf-inference-dummy/COPYRIGHT.md new file mode 100644 index 000000000..696f82922 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/reproduce-mlperf-inference-dummy/COPYRIGHT.md @@ -0,0 +1,3 @@ +© 2022-2025 MLCommons. All Rights Reserved. + +Grigori Fursin, the cTuning foundation and OctoML donated the CK and CM projects to MLCommons to benefit everyone. diff --git a/cmx4mlops/cmx4mlops/repo/script/run-docker-container/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/run-docker-container/_cm.yaml index 5135070b9..3bc5ac184 100644 --- a/cmx4mlops/cmx4mlops/repo/script/run-docker-container/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/run-docker-container/_cm.yaml @@ -17,6 +17,7 @@ default_env: CM_DOCKER_DETACHED_MODE: 'yes' CM_DOCKER_REUSE_EXISTING_CONTAINER: 'no' CM_DOCKER_PRIVILEGED_MODE: 'no' + CM_PODMAN_MAP_USER_ID: 'no' input_mapping: all_gpus: CM_DOCKER_ADD_ALL_GPUS @@ -57,6 +58,9 @@ input_mapping: new_env_keys: - 'CM_DOCKER_CONTAINER_ID' +deps: + - tags: get,docker + prehook_deps: - names: - build-docker-image @@ -68,3 +72,4 @@ prehook_deps: CM_DOCKER_CONTAINER_ID: - on tags: build,docker,image +- tags: get,docker diff --git a/cmx4mlops/cmx4mlops/repo/script/run-docker-container/customize.py b/cmx4mlops/cmx4mlops/repo/script/run-docker-container/customize.py index 6a0ce7ce5..73d603235 100644 --- a/cmx4mlops/cmx4mlops/repo/script/run-docker-container/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/run-docker-container/customize.py @@ -14,6 +14,7 @@ import os import subprocess from os.path import exists +import json def preprocess(i): @@ -62,7 +63,7 @@ def preprocess(i): print('') print('Checking existing Docker container:') print('') - CMD = f"""docker ps --filter "ancestor={DOCKER_CONTAINER}" """ + CMD = f"""{env['CM_CONTAINER_TOOL']} ps --format=json --filter "ancestor={DOCKER_CONTAINER}" """ if os_info['platform'] == 'windows': CMD += " 2> nul" else: @@ -71,17 +72,31 @@ def preprocess(i): print('') try: - docker_container = subprocess.check_output( - CMD, shell=True).decode("utf-8") + out = subprocess.check_output( + CMD, shell=True, text=True).strip() except Exception as e: return { - 'return': 1, 'error': 'Docker is either not installed or not started:\n{}'.format(e)} + 'return': 1, + 'error': 'Unexpected error occurred with docker run:\n{}'.format(e) + } + + if len(out) > 0 and str(env.get('CM_DOCKER_REUSE_EXISTING_CONTAINER', + '')).lower() in ["1", "true", "yes"]: # container exists + # print(out) + out_split = out.splitlines() + if len(out_split) > 0: + try: + out_json = json.loads(out_split[0]) + # print("JSON successfully loaded:", out_json) + except json.JSONDecodeError as e: + print(f"Error: First line of 'out' is not valid JSON: {e}") + return { + 'return': 1, 'error': f"Error: First line of 'out' is not valid JSON: {e}"} + else: + out_json = [] - output_split = docker_container.split("\n") - if len(output_split) > 1 and str(env.get('CM_DOCKER_REUSE_EXISTING_CONTAINER', - '')).lower() in ["1", "true", "yes"]: # container exists - out = output_split[1].split(" ") - existing_container_id = out[0] + if isinstance(out_json, list) and len(out_json) > 0: + existing_container_id = out_json[0]['Id'] print(f"Reusing existing container {existing_container_id}") env['CM_DOCKER_CONTAINER_ID'] = existing_container_id @@ -89,7 +104,7 @@ def preprocess(i): if env.get('CM_DOCKER_CONTAINER_ID', '') != '': del (env['CM_DOCKER_CONTAINER_ID']) # not valid ID - CMD = "docker images -q " + DOCKER_CONTAINER + CMD = f"""{env['CM_CONTAINER_TOOL']} images -q """ + DOCKER_CONTAINER if os_info['platform'] == 'windows': CMD += " 2> nul" @@ -178,6 +193,10 @@ def postprocess(i): if env.get('CM_DOCKER_EXTRA_RUN_ARGS', '') != '': run_opts += env['CM_DOCKER_EXTRA_RUN_ARGS'] + if env.get('CM_CONTAINER_TOOL', '') == 'podman' and env.get( + 'CM_PODMAN_MAP_USER_ID', '').lower() not in ["no", "0", "false"]: + run_opts += " --userns=keep-id" + if env.get('CM_DOCKER_PORT_MAPS', []): for ports in env['CM_DOCKER_PORT_MAPS']: port_map_cmds.append(ports) @@ -203,11 +222,6 @@ def postprocess(i): return {'return': 1, 'error': 'Can\'t find separator : in a mount string: {}'.format( mount_cmd)} -# mount_parts = mount_cmd.split(":") -# if len(mount_parts) != 2: -# return {'return': 1, 'error': 'Invalid mount {} -# specified'.format(mount_parts)} - host_mount = mount_parts[0] if not os.path.exists(host_mount): @@ -247,14 +261,14 @@ def postprocess(i): existing_container_id = env.get('CM_DOCKER_CONTAINER_ID', '') if existing_container_id: - CMD = f"ID={existing_container_id} && docker exec $ID bash -c '" + run_cmd + "'" + CMD = f"""ID={existing_container_id} && {env['CM_CONTAINER_TOOL']} exec $ID bash -c '""" + run_cmd + "'" else: - CONTAINER = f"docker run -dt {run_opts} --rm {docker_image_repo}/{docker_image_name}:{docker_image_tag} bash" - CMD = f"ID=`{CONTAINER}` && docker exec $ID bash -c '{run_cmd}'" + CONTAINER = f"""{env['CM_CONTAINER_TOOL']} run -dt {run_opts} --rm {docker_image_repo}/{docker_image_name}:{docker_image_tag} bash""" + CMD = f"""ID=`{CONTAINER}` && {env['CM_CONTAINER_TOOL']} exec $ID bash -c '{run_cmd}'""" if False and str(env.get('CM_KEEP_DETACHED_CONTAINER', '')).lower() not in [ 'yes', "1", 'true']: - CMD += " && docker kill $ID >/dev/null" + CMD += f""" && {env['CM_CONTAINER_TOOL']} kill $ID >/dev/null""" CMD += ' && echo "ID=$ID"' @@ -263,7 +277,10 @@ def postprocess(i): print('') print(CMD) print('') - print("Running " + run_cmd + " inside docker container") + print( + "Running " + + run_cmd + + f""" inside {env['CM_CONTAINER_TOOL']} container""") record_script({'cmd': CMD, 'env': env}) @@ -287,7 +304,8 @@ def postprocess(i): docker_out = result.stdout # if docker_out != 0: - # return {'return': docker_out, 'error': 'docker run failed'} + # return {'return': docker_out, 'error': f""{env['CM_CONTAINER_TOOL']} + # run failed""} lines = docker_out.split("\n") @@ -311,7 +329,7 @@ def postprocess(i): x1 = '-it' x2 = " && bash ) || bash" - CONTAINER = "docker run " + x1 + " --entrypoint " + x + x + " " + run_opts + \ + CONTAINER = f"{env['CM_CONTAINER_TOOL']} run " + x1 + " --entrypoint " + x + x + " " + run_opts + \ " " + docker_image_repo + "/" + docker_image_name + ":" + docker_image_tag CMD = CONTAINER + " bash -c " + x + run_cmd_prefix + run_cmd + x2 + x @@ -325,7 +343,10 @@ def postprocess(i): print('') docker_out = os.system(CMD) if docker_out != 0: - return {'return': docker_out, 'error': 'docker run failed'} + if docker_out % 256 == 0: + docker_out = 1 + return {'return': docker_out, + 'error': f"""{env['CM_CONTAINER_TOOL']} run failed"""} return {'return': 0} @@ -360,7 +381,7 @@ def record_script(i): def update_docker_info(env): # Updating Docker info - docker_image_repo = env.get('CM_DOCKER_IMAGE_REPO', 'local') + docker_image_repo = env.get('CM_DOCKER_IMAGE_REPO', 'localhost/local') env['CM_DOCKER_IMAGE_REPO'] = docker_image_repo docker_image_base = env.get('CM_DOCKER_IMAGE_BASE') diff --git a/cmx4mlops/cmx4mlops/repo/script/run-mlperf-automotive-app/COPYRIGHT.md b/cmx4mlops/cmx4mlops/repo/script/run-mlperf-automotive-app/COPYRIGHT.md new file mode 100644 index 000000000..a059b0c49 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/run-mlperf-automotive-app/COPYRIGHT.md @@ -0,0 +1,9 @@ +# Copyright Notice + +© 2024-2025 MLCommons. All Rights Reserved. + +This file is licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. A copy of the License can be obtained at: + +[Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) + +Unless required by applicable law or agreed to in writing, software distributed under the License is provided on an "AS IS" basis, without warranties or conditions of any kind, either express or implied. Please refer to the License for the specific language governing permissions and limitations under the License. diff --git a/cmx4mlops/cmx4mlops/repo/script/run-mlperf-automotive-app/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/run-mlperf-automotive-app/_cm.yaml new file mode 100644 index 000000000..942f499e8 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/run-mlperf-automotive-app/_cm.yaml @@ -0,0 +1,248 @@ +alias: run-mlperf-automotive-app +uid: 2a7315d2dff74898 + +automation_alias: script +automation_uid: 5b4e0237da074764 + +category: Modular MLPerf inference benchmark pipeline + +developers: "[Arjun Suresh](https://www.linkedin.com/in/arjunsuresh), [Grigori Fursin](https://cKnowledge.org/gfursin)" + + +clean_output_files: +- open.tar.gz +- summary.csv +- summary.json + +tags: +- run +- run-abtf +- run-abtf-inference +- mlcommons +- inference +- reference + +tags_help: "run-abtf,inference" + +default_env: + CM_MLPERF_IMPLEMENTATION: reference + CM_MLPERF_MODEL: retinanet + CM_MLPERF_RUN_STYLE: test + +input_mapping: + backend: CM_MLPERF_BACKEND + clean: CM_MLPERF_CLEAN_ALL + compliance: CM_MLPERF_LOADGEN_COMPLIANCE + dashboard_wb_project: CM_MLPERF_DASHBOARD_WANDB_PROJECT + dashboard_wb_user: CM_MLPERF_DASHBOARD_WANDB_USER + debug: CM_DEBUG_SCRIPT_BENCHMARK_PROGRAM + device: CM_MLPERF_DEVICE + division: CM_MLPERF_SUBMISSION_DIVISION + docker: CM_MLPERF_USE_DOCKER + dump_version_info: CM_DUMP_VERSION_INFO + save_console_log: CM_SAVE_CONSOLE_LOG + execution_mode: CM_MLPERF_RUN_STYLE + find_performance: CM_MLPERF_FIND_PERFORMANCE_MODE + gh_token: CM_GH_TOKEN + gpu_name: CM_NVIDIA_GPU_NAME + hw_name: CM_HW_NAME + hw_notes_extra: CM_MLPERF_SUT_SW_NOTES_EXTRA + imagenet_path: IMAGENET_PATH + implementation: CM_MLPERF_IMPLEMENTATION + lang: CM_MLPERF_IMPLEMENTATION + mode: CM_MLPERF_LOADGEN_MODE + model: CM_MLPERF_MODEL + multistream_target_latency: CM_MLPERF_LOADGEN_MULTISTREAM_TARGET_LATENCY + offline_target_qps: CM_MLPERF_LOADGEN_OFFLINE_TARGET_QPS + output_dir: OUTPUT_BASE_DIR + output_summary: MLPERF_INFERENCE_SUBMISSION_SUMMARY + output_tar: MLPERF_INFERENCE_SUBMISSION_TAR_FILE + performance_sample_count: CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT + power: CM_SYSTEM_POWER + precision: CM_MLPERF_MODEL_PRECISION + preprocess_submission: CM_RUN_MLPERF_SUBMISSION_PREPROCESSOR + push_to_github: CM_MLPERF_RESULT_PUSH_TO_GITHUB + readme: CM_MLPERF_README + regenerate_accuracy_file: CM_MLPERF_REGENERATE_ACCURACY_FILE + regenerate_files: CM_REGENERATE_MEASURE_FILES + rerun: CM_RERUN + results_dir: OUTPUT_BASE_DIR + results_git_url: CM_MLPERF_RESULTS_GIT_REPO_URL + run_checker: CM_RUN_SUBMISSION_CHECKER + run_style: CM_MLPERF_RUN_STYLE + scenario: CM_MLPERF_LOADGEN_SCENARIO + server_target_qps: CM_MLPERF_LOADGEN_SERVER_TARGET_QPS + singlestream_target_latency: CM_MLPERF_LOADGEN_SINGLESTREAM_TARGET_LATENCY + skip_submission_generation: CM_MLPERF_SKIP_SUBMISSION_GENERATION + skip_truncation: CM_SKIP_TRUNCATE_ACCURACY + submission_dir: CM_MLPERF_INFERENCE_SUBMISSION_DIR + submitter: CM_MLPERF_SUBMITTER + sut_servers: CM_NETWORK_LOADGEN_SUT_SERVERS + sw_notes_extra: CM_MLPERF_SUT_SW_NOTES_EXTRA + system_type: CM_MLPERF_SUBMISSION_SYSTEM_TYPE + target_latency: CM_MLPERF_LOADGEN_TARGET_LATENCY + target_qps: CM_MLPERF_LOADGEN_TARGET_QPS + test_query_count: CM_TEST_QUERY_COUNT + threads: CM_NUM_THREADS + batch_size: CM_MLPERF_LOADGEN_MAX_BATCHSIZE + sut: CM_MLPERF_INFERENCE_SUT_VARIATION + +new_state_keys: +- app_mlperf_inference_* +- cm-mlperf-inference-results* + +deps: +- tags: detect,os + skip_if_env: + CM_MLPERF_USE_DOCKER: [ on ] +- tags: detect,cpu + skip_if_env: + CM_MLPERF_USE_DOCKER: [ on ] +- names: + - python + - python3 + tags: get,python3 + skip_if_env: + CM_MLPERF_USE_DOCKER: [ on ] +- names: + - inference-src + tags: get,mlcommons,inference,src + skip_if_env: + CM_MLPERF_USE_DOCKER: [ on ] +- tags: get,sut,description + skip_if_env: + CM_MLPERF_USE_DOCKER: [ on ] + +- tags: get,mlperf,inference,results,dir + names: + - get-mlperf-inference-results-dir + enable_if_env: + CM_MLPERF_USE_DOCKER: [ off ] + skip_if_env: + OUTPUT_BASE_DIR: [ on ] +- tags: install,pip-package,for-cmind-python,_package.tabulate +- tags: get,mlperf,inference,utils + skip_if_env: + CM_MLPERF_USE_DOCKER: [ on ] + +variations: + accuracy-only: + default_variations: + submission-generation-style: full + env: + CM_MLPERF_LOADGEN_MODE: accuracy + CM_MLPERF_SUBMISSION_RUN: 'yes' + CM_RUN_MLPERF_ACCURACY: 'on' + CM_RUN_SUBMISSION_CHECKER: 'no' + group: submission-generation + + all-modes: + env: + CM_MLPERF_LOADGEN_ALL_MODES: 'yes' + group: mode + + all-scenarios: + env: + CM_MLPERF_LOADGEN_ALL_SCENARIOS: 'yes' + + compliance: + env: + CM_MLPERF_LOADGEN_COMPLIANCE: 'yes' + + dashboard: + default_gui: false + env: + CM_MLPERF_DASHBOARD: 'on' + + find-performance: + env: + CM_MLPERF_FIND_PERFORMANCE_MODE: 'yes' + CM_MLPERF_LOADGEN_ALL_MODES: 'no' + CM_MLPERF_LOADGEN_MODE: performance + CM_MLPERF_RESULT_PUSH_TO_GITHUB: false + group: submission-generation + + full: + add_deps_recursive: + coco2014-original: + tags: _full + coco2014-preprocessed: + tags: _full + env: + CM_MLPERF_SUBMISSION_GENERATION_STYLE: full + CM_MLPERF_SKIP_SUBMISSION_GENERATION: 'yes' + group: submission-generation-style + + performance-only: + default_variations: + submission-generation-style: full + env: + CM_MLPERF_LOADGEN_MODE: performance + CM_MLPERF_SUBMISSION_RUN: 'yes' + CM_RUN_SUBMISSION_CHECKER: 'no' + group: submission-generation + + mvp-demo: + default_env: + CM_MLPERF_DEVICE: cpu + + env: + CM_MLPERF_INFERENCE_VERSION: mvp-demo + CM_MLPERF_MODEL: abtf-demo-model + CM_MLPERF_BACKEND: pytorch + CM_MLPERF_IMPLEMENTATION: mlcommons-python + CM_MLPERF_LOADGEN_SCENARIO: SingleStream + CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: mvp-demo + adr: + compiler: + tags: gcc + group: benchmark-version + + poc-demo: + default_env: + CM_MLPERF_DEVICE: cpu + CM_TEST_QUERY_COUNT: "20" + + env: + CM_MLPERF_INFERENCE_VERSION: poc-demo + CM_MLPERF_MODEL: abtf-poc-model + CM_MLPERF_BACKEND: pytorch + CM_MLPERF_IMPLEMENTATION: mlcommons-python + CM_MLPERF_LOADGEN_SCENARIO: SingleStream + CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: poc-demo + adr: + compiler: + tags: gcc + group: benchmark-version + + performance-and-accuracy: + default: true + base: + - all-modes + default_variations: + submission-generation-style: full + group: submission-generation + + submission: + base: + - all-modes + default_gui: true + default_variations: + submission-generation-style: full + env: + CM_MLPERF_LOADGEN_COMPLIANCE: 'yes' + CM_MLPERF_SUBMISSION_RUN: 'yes' + CM_RUN_MLPERF_ACCURACY: 'on' + CM_RUN_SUBMISSION_CHECKER: 'yes' + CM_TAR_SUBMISSION_DIR: 'yes' + group: submission-generation + post_deps: + - names: + - submission-generator + enable_if_env: + CM_MLPERF_SKIP_SUBMISSION_GENERATION: + - 'no' + - 'false' + - 'False' + - '0' + tags: generate,mlperf,inference,submission diff --git a/cmx4mlops/cmx4mlops/repo/script/run-mlperf-automotive-app/customize.py b/cmx4mlops/cmx4mlops/repo/script/run-mlperf-automotive-app/customize.py new file mode 100644 index 000000000..14cb9c237 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/run-mlperf-automotive-app/customize.py @@ -0,0 +1,403 @@ +from cmind import utils +import os +import json +import shutil +import subprocess +import cmind as cm +import copy +from tabulate import tabulate + +summary_ext = ['.csv', '.json', '.xlsx'] + +########################################################################## + + +def preprocess(i): + + os_info = i['os_info'] + env = i['env'] + + inp = i['input'] + state = i['state'] + script_path = i['run_script_input']['path'] + + if env.get('CM_RUN_DOCKER_CONTAINER', '') == "yes": + return {'return': 0} + + dump_version_info = env.get('CM_DUMP_VERSION_INFO', True) + system_meta = state.get('CM_SUT_META', {}) + if system_meta: + env['CM_SUT_META_EXISTS'] = "yes" + + env['CM_MODEL'] = env['CM_MLPERF_MODEL'] + + # Clean MLPerf inference output tar file if non-standard + x = env.get('MLPERF_INFERENCE_SUBMISSION_TAR_FILE', '') + if x != '' and os.path.isfile(x): + os.remove(x) + + # Clean MLPerf inference submission summary files + x = env.get('MLPERF_INFERENCE_SUBMISSION_SUMMARY', '') + if x != '': + for y in summary_ext: + z = x + y + if os.path.isfile(z): + os.remove(z) + + if env.get('CM_MLPERF_SUBMISSION_SYSTEM_TYPE', '') != '': + system_type = env['CM_MLPERF_SUBMISSION_SYSTEM_TYPE'] + system_meta['system_type'] = system_type + + if env.get('CM_MLPERF_SUBMISSION_DIVISION', '') != '': + division = env['CM_MLPERF_SUBMISSION_DIVISION'] + system_meta['division'] = division + + if system_meta.get('division', '') != "closed": + # no compliance runs needed for open division + env["CM_MLPERF_LOADGEN_COMPLIANCE"] = "no" + + clean = False + + if 'CM_MLPERF_CLEAN_ALL' in env: + clean = True + if 'CM_MLPERF_CLEAN_SUBMISSION_DIR' not in env: + env['CM_MLPERF_CLEAN_SUBMISSION_DIR'] = "yes" + if 'CM_RERUN' not in env: + env['CM_RERUN'] = "yes" + + if str(env.get('CM_SYSTEM_POWER', 'no')).lower( + ) != "no" or env.get('CM_MLPERF_POWER', '') == "yes": + power_variation = ",_power" + env['CM_MLPERF_POWER'] = "yes" + else: + power_variation = "" + + if env.get('CM_RUN_STYLE', + '') == "valid" and 'CM_RUN_MLPERF_ACCURACY' not in env: + env['CM_RUN_MLPERF_ACCURACY'] = "on" + + if env.get('CM_MLPERF_INFERENCE_SOURCE', '') != '': + print( + "Using MLCommons Inference source from " + + env['CM_MLPERF_INFERENCE_SOURCE']) + + if 'CM_MLPERF_LOADGEN_EXTRA_OPTIONS' not in env: + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] = "" + + if 'CM_MLPERF_LOADGEN_MODES' not in env: + if 'CM_MLPERF_LOADGEN_MODE' not in env: + env['CM_MLPERF_LOADGEN_MODE'] = "performance" + + if 'CM_MLPERF_LOADGEN_SCENARIOS' not in env: + if 'CM_MLPERF_LOADGEN_SCENARIO' not in env: + env['CM_MLPERF_LOADGEN_SCENARIO'] = "Offline" + + if env.get('CM_MLPERF_LOADGEN_ALL_SCENARIOS', '') == "yes": + env['CM_MLPERF_LOADGEN_SCENARIOS'] = get_valid_scenarios( + env['CM_MODEL'], + system_meta['system_type'], + env['CM_MLPERF_LAST_RELEASE'], + env['CM_MLPERF_INFERENCE_SOURCE']) + else: + system_meta = {} + env['CM_MLPERF_LOADGEN_SCENARIOS'] = [ + env['CM_MLPERF_LOADGEN_SCENARIO']] + + if env.get('CM_MLPERF_LOADGEN_ALL_MODES', '') == "yes": + env['CM_MLPERF_LOADGEN_MODES'] = ["performance", "accuracy"] + else: + env['CM_MLPERF_LOADGEN_MODES'] = [env['CM_MLPERF_LOADGEN_MODE']] + + if env.get('OUTPUT_BASE_DIR', '') == '': + env['OUTPUT_BASE_DIR'] = env.get( + 'CM_MLPERF_INFERENCE_RESULTS_DIR', os.getcwd()) + + test_list = [] + + variation_implementation = "_" + \ + env.get("CM_MLPERF_IMPLEMENTATION", "reference") + variation_model = ",_" + env["CM_MLPERF_MODEL"] + variation_backend = ",_" + \ + env["CM_MLPERF_BACKEND"] if env.get( + "CM_MLPERF_BACKEND", "") != "" else "" + variation_device = ",_" + \ + env["CM_MLPERF_DEVICE"] if env.get( + "CM_MLPERF_DEVICE", "") != "" else "" + variation_run_style = ",_" + env.get("CM_MLPERF_RUN_STYLE", "test") + variation_reproducibility = ",_" + env["CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS"] if env.get( + "CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS", "") != "" else "" + + if env.get("CM_MLPERF_MODEL_PRECISION", '') != '': + variation_quantization_string = ",_" + env["CM_MLPERF_MODEL_PRECISION"] + else: + variation_quantization_string = "" + + tags = "app,abtf-inference," + variation_implementation + variation_model + variation_backend + variation_device + \ + variation_run_style + variation_reproducibility + \ + variation_quantization_string + power_variation + verbose = inp.get('v', False) + print_env = inp.get('print_env', False) + print_deps = inp.get('print_deps', False) + add_deps_recursive = inp.get('add_deps_recursive', {}) + add_deps = inp.get('add_deps', {}) + ad = inp.get('ad', {}) + adr = inp.get('adr', {}) + adr_from_meta = i['run_script_input'].get('add_deps_recursive') + + for key in adr_from_meta: + add_deps_recursive[key] = adr_from_meta[key] + + if env.get('CM_MLPERF_LOADGEN_MAX_BATCHSIZE', '') != '': + if not add_deps_recursive.get('mlperf-inference-implementation', {}): + add_deps_recursive['mlperf-inference-implementation'] = {} + if add_deps_recursive['mlperf-inference-implementation'].get( + 'tags', '') == '': + add_deps_recursive['mlperf-inference-implementation']['tags'] = '' + else: + add_deps_recursive['mlperf-inference-implementation']['tags'] += ',' + add_deps_recursive['mlperf-inference-implementation']['tags'] += "_batch_size." + \ + env['CM_MLPERF_LOADGEN_MAX_BATCHSIZE'] + + if env.get('CM_MLPERF_INFERENCE_SUT_VARIATION', '') != '': + if not add_deps_recursive.get('mlperf-inference-implementation', {}): + add_deps_recursive['mlperf-inference-implementation'] = {} + if add_deps_recursive['mlperf-inference-implementation'].get( + 'tags', '') == '': + add_deps_recursive['mlperf-inference-implementation']['tags'] = '' + else: + add_deps_recursive['mlperf-inference-implementation']['tags'] += ',' + add_deps_recursive['mlperf-inference-implementation']['tags'] += "_" + \ + env['CM_MLPERF_INFERENCE_SUT_VARIATION'] + + if env.get('CM_NETWORK_LOADGEN', '') != '': + if not add_deps_recursive.get('mlperf-inference-implementation', {}): + add_deps_recursive['mlperf-inference-implementation'] = {} + network_variation_tag = f"_network-{env['CM_NETWORK_LOADGEN']}" + if add_deps_recursive['mlperf-inference-implementation'].get( + 'tags', '') == '': + add_deps_recursive['mlperf-inference-implementation']['tags'] = '' + else: + add_deps_recursive['mlperf-inference-implementation']['tags'] += ',' + add_deps_recursive['mlperf-inference-implementation']['tags'] += network_variation_tag + + if env.get('CM_OUTPUT_FOLDER_NAME', '') == '': + env['CM_OUTPUT_FOLDER_NAME'] = env['CM_MLPERF_RUN_STYLE'] + "_results" + + output_dir = os.path.join( + env['OUTPUT_BASE_DIR'], + env['CM_OUTPUT_FOLDER_NAME']) + if clean: + path_to_clean = output_dir + + print('=========================================================') + print('Cleaning results in {}'.format(path_to_clean)) + if os.path.exists(path_to_clean): + shutil.rmtree(path_to_clean) + + print('=========================================================') + + if str(env.get('CM_MLPERF_USE_DOCKER', '') + ).lower() in ["1", "true", "yes"]: + action = "docker" + del (env['OUTPUT_BASE_DIR']) + state = {} + docker_extra_input = {} + + if env.get('CM_HW_NAME'): + del (env['CM_HW_NAME']) + + for k in inp: + if k.startswith("docker_"): + docker_extra_input[k] = inp[k] + inp = {} + else: + action = "run" + + # local_keys = [ 'CM_MLPERF_SKIP_RUN', 'CM_MLPERF_LOADGEN_QUERY_COUNT', + # 'CM_MLPERF_LOADGEN_TARGET_QPS', 'CM_MLPERF_LOADGEN_TARGET_LATENCY' ] + + for scenario in env['CM_MLPERF_LOADGEN_SCENARIOS']: + scenario_tags = tags + ",_" + scenario.lower() + env['CM_MLPERF_LOADGEN_SCENARIO'] = scenario + + if scenario == "Offline": + if env.get('CM_MLPERF_LOADGEN_OFFLINE_TARGET_QPS'): + env['CM_MLPERF_LOADGEN_TARGET_QPS'] = env['CM_MLPERF_LOADGEN_OFFLINE_TARGET_QPS'] + elif scenario == "Server": + if env.get('CM_MLPERF_LOADGEN_SERVER_TARGET_QPS'): + env['CM_MLPERF_LOADGEN_TARGET_QPS'] = env['CM_MLPERF_LOADGEN_SERVER_TARGET_QPS'] + elif scenario == "SingleStream": + if env.get('CM_MLPERF_LOADGEN_SINGLESTREAM_TARGET_LATENCY'): + env['CM_MLPERF_LOADGEN_TARGET_LATENCY'] = env['CM_MLPERF_LOADGEN_SINGLESTREAM_TARGET_LATENCY'] + elif scenario == "MultiStream": + if env.get('CM_MLPERF_LOADGEN_MULTISTREAM_TARGET_LATENCY'): + env['CM_MLPERF_LOADGEN_TARGET_LATENCY'] = env['CM_MLPERF_LOADGEN_MULTISTREAM_TARGET_LATENCY'] + + for mode in env['CM_MLPERF_LOADGEN_MODES']: + env_copy = copy.deepcopy(env) + env_copy['CM_MLPERF_LOADGEN_MODE'] = mode + for key in env_copy: + if isinstance(env_copy[key], str) and env_copy[key].startswith( + "CM_TMP_"): + del env_copy[key] + + print(f"\nRunning loadgen scenario: {scenario} and mode: {mode}") + ii = {'action': action, 'automation': 'script', 'tags': scenario_tags, 'quiet': 'true', + 'env': env_copy, 'input': inp, 'state': state, 'add_deps': copy.deepcopy(add_deps), 'add_deps_recursive': + copy.deepcopy(add_deps_recursive), 'ad': ad, 'adr': copy.deepcopy(adr), 'v': verbose, 'print_env': print_env, 'print_deps': print_deps, 'dump_version_info': dump_version_info} + + if action == "docker": + for k in docker_extra_input: + ii[k] = docker_extra_input[k] + r = cm.access(ii) + if r['return'] > 0: + return r + if action == "docker": + # We run commands interactively inside the docker container + return {'return': 0} + + if env_copy.get('CM_OUTPUT_PREDICTIONS_PATH'): + print( + f"\nOutput predictions can be seen by opening the images inside {env_copy['CM_OUTPUT_PREDICTIONS_PATH']}\n") + + if state.get('docker', {}): + del (state['docker']) + + if env.get("CM_MLPERF_LOADGEN_COMPLIANCE", "") == "yes": + for test in test_list: + env_copy = copy.deepcopy(env) + for key in env_copy: + if isinstance(env_copy[key], str) and env_copy[key].startswith( + "CM_TMP_"): + del env_copy[key] + env_copy['CM_MLPERF_LOADGEN_COMPLIANCE_TEST'] = test + env_copy['CM_MLPERF_LOADGEN_MODE'] = "compliance" + ii = {'action': action, 'automation': 'script', 'tags': scenario_tags, 'quiet': 'true', + 'env': env_copy, 'input': inp, 'state': state, 'add_deps': copy.deepcopy(add_deps), 'add_deps_recursive': + copy.deepcopy(add_deps_recursive), 'adr': copy.deepcopy(adr), 'ad': ad, 'v': verbose, 'print_env': print_env, 'print_deps': print_deps, 'dump_version_info': dump_version_info} + if action == "docker": + for k in docker_extra_input: + ii[k] = docker_extra_input[k] + r = cm.access(ii) + if r['return'] > 0: + return r + if state.get('docker', {}): + del (state['docker']) + + if state.get("cm-mlperf-inference-results"): + # print(state["cm-mlperf-inference-results"]) + for sut in state["cm-mlperf-inference-results"]: # only one sut will be there + # Better to do this in a stand alone CM script with proper deps but + # currently we manage this by modifying the sys path of the python + # executing CM + import mlperf_utils # noqa + + print(sut) + result_table, headers = mlperf_utils.get_result_table( + state["cm-mlperf-inference-results"][sut]) + print(tabulate(result_table, headers=headers, tablefmt="pretty")) + + print( + f"\nThe MLPerf inference results are stored at {output_dir}\n") + + return {'return': 0} + + +def get_valid_scenarios(model, category, mlperf_version, mlperf_path): + + import sys + + submission_checker_dir = os.path.join(mlperf_path, "tools", "submission") + + sys.path.append(submission_checker_dir) + if not os.path.exists(os.path.join( + submission_checker_dir, "submission_checker.py")): + shutil.copy(os.path.join(submission_checker_dir, "submission-checker.py"), os.path.join(submission_checker_dir, + "submission_checker.py")) + + import submission_checker as checker + + if "dlrm-99" in model: + model = model.replace("dlrm-99", "dlrm-v2-99") + if "sdxl" in model: + model = "stable-diffusion-xl" + + config = checker.MODEL_CONFIG + + internal_model_name = config[mlperf_version]["model_mapping"].get( + model, model) + + valid_scenarios = config[mlperf_version]["required-scenarios-" + + category][internal_model_name] + + print( + "Valid Scenarios for " + + model + + " in " + + category + + " category are :" + + str(valid_scenarios)) + + return valid_scenarios + +########################################################################## + + +def postprocess(i): + + env = i['env'] + state = i['state'] + + if env.get('CM_MLPERF_IMPLEMENTATION', '') == 'reference': + x1 = env.get('CM_MLPERF_INFERENCE_SOURCE', '') + x2 = env.get('CM_MLPERF_INFERENCE_CONF_PATH', '') + + if x1 != '' and x2 != '': + print('') + print( + 'Path to the MLPerf inference benchmark reference sources: {}'.format(x1)) + print( + 'Path to the MLPerf inference reference configuration file: {}'.format(x2)) + print('') + + return {'return': 0} + + +########################################################################## + + +def load_md(path, path2, name): + + fn = os.path.join(path, path2, name + '.md') + + s = '' + + if os.path.isfile(fn): + r = utils.load_txt(fn) + if r['return'] > 0: + return r + + s = r['string'] + + return {'return': 0, 'string': s} + +########################################################################## + + +def get_url(url, path, path2, name, text): + + name_md = name + '.md' + fn = os.path.join(path, path2, name_md) + + urlx = '' + url_online = '' + if os.path.isfile(fn): + if not url.endswith('/'): + url += '/' + urlx = url + path2 + '/' + name_md + + url_online = '[{}]({})'.format(text, urlx) + + return {'return': 0, 'url_online': url_online} + +########################################################################## diff --git a/cmx4mlops/cmx4mlops/repo/script/run-mlperf-inference-app/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/run-mlperf-inference-app/_cm.yaml index cf390bc3a..29effb5c2 100644 --- a/cmx4mlops/cmx4mlops/repo/script/run-mlperf-inference-app/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/run-mlperf-inference-app/_cm.yaml @@ -8,9 +8,6 @@ category: Modular MLPerf inference benchmark pipeline developers: "[Arjun Suresh](https://www.linkedin.com/in/arjunsuresh), [Grigori Fursin](https://cKnowledge.org/gfursin)" -gui: - title: CM GUI to run MLPerf inference benchmarks and prepare submissions - clean_output_files: - open.tar.gz - summary.csv @@ -29,6 +26,7 @@ tags: - reference tags_help: "run-mlperf,inference" +predeps: False default_env: CM_MLPERF_IMPLEMENTATION: reference @@ -36,6 +34,8 @@ default_env: CM_MLPERF_RUN_STYLE: test CM_MLPERF_SKIP_SUBMISSION_GENERATION: no CM_DOCKER_PRIVILEGED_MODE: yes + CM_MLPERF_SUBMISSION_DIVISION: open + CM_MLPERF_INFERENCE_TP_SIZE: 1 input_mapping: api_server: CM_MLPERF_INFERENCE_API_SERVER @@ -110,6 +110,7 @@ input_mapping: sut: CM_MLPERF_INFERENCE_SUT_VARIATION nvidia_llama2_dataset_file_path: CM_NVIDIA_LLAMA_DATASET_FILE_PATH tp_size: CM_NVIDIA_TP_SIZE + vllm_tp_size: CM_MLPERF_INFERENCE_TP_SIZE vllm_model_name: CM_VLLM_SERVER_MODEL_NAME num_workers: CM_MLPERF_INFERENCE_NUM_WORKERS max_test_duration: CM_MLPERF_MAX_DURATION_TEST @@ -139,6 +140,12 @@ deps: - names: - inference-src tags: get,mlcommons,inference,src +- tags: pull,git,repo + env: + CM_GIT_CHECKOUT_PATH: '<<>>' + enable_if_env: + CM_MLPERF_INFERENCE_PULL_SRC_CHANGES: + - 'yes' - tags: get,sut,description skip_if_env: CM_MLPERF_USE_DOCKER: [ on ] @@ -201,11 +208,6 @@ variations: env: CM_MLPERF_LOADGEN_COMPLIANCE: 'yes' - dashboard: - default_gui: false - env: - CM_MLPERF_DASHBOARD: 'on' - find-performance: env: CM_MLPERF_FIND_PERFORMANCE_MODE: 'yes' @@ -334,7 +336,6 @@ variations: tags: _version.r4_0-dev r4.1-dev: - default: true env: CM_MLPERF_INFERENCE_VERSION: '4.1-dev' CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r4.1-dev_default @@ -351,6 +352,7 @@ variations: env: CM_MLPERF_INFERENCE_VERSION: '4.1' CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r4.1_default + CM_MLPERF_SUBMISSION_CHECKER_VERSION: v4.1 adr: get-mlperf-inference-results-dir: tags: _version.r4_1 @@ -359,6 +361,21 @@ variations: mlperf-inference-nvidia-scratch-space: tags: _version.r4_1 group: benchmark-version + + r5.0-dev: + default: true + env: + CM_MLPERF_INFERENCE_VERSION: '5.0-dev' + CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r5.0-dev_default + CM_MLPERF_SUBMISSION_CHECKER_VERSION: v5.0 + group: benchmark-version + adr: + get-mlperf-inference-results-dir: + tags: _version.r5.0-dev + get-mlperf-inference-submission-dir: + tags: _version.r5.0-dev + mlperf-inference-nvidia-scratch-space: + tags: _version.r5.0-dev short: add_deps_recursive: @@ -382,7 +399,6 @@ variations: submission: base: - all-modes - default_gui: true default_variations: submission-generation-style: full env: @@ -449,6 +465,7 @@ input_description: - mobilenet - efficientnet - rgat + - llama3_1-405b default: resnet50 desc: MLPerf model sort: 200 diff --git a/cmx4mlops/cmx4mlops/repo/script/run-mlperf-inference-app/customize.py b/cmx4mlops/cmx4mlops/repo/script/run-mlperf-inference-app/customize.py index e620df68b..d710d1f7c 100644 --- a/cmx4mlops/cmx4mlops/repo/script/run-mlperf-inference-app/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/run-mlperf-inference-app/customize.py @@ -244,7 +244,8 @@ def preprocess(i): inp = {} if str(docker_dt).lower() in ["yes", "true", "1"]: # turning it off for the first run and after that we turn it on - env['CM_DOCKER_REUSE_EXISTING_CONTAINER'] = 'no' + if env.get('CM_DOCKER_REUSE_EXISTING_CONTAINER', '') == '': + env['CM_DOCKER_REUSE_EXISTING_CONTAINER'] = 'no' env['CM_DOCKER_DETACHED_MODE'] = 'yes' if env.get('CM_DOCKER_IMAGE_NAME', '') != '': @@ -444,587 +445,3 @@ def get_url(url, path, path2, name, text): url_online = '[{}]({})'.format(text, urlx) return {'return': 0, 'url_online': url_online} - -########################################################################## - - -def gui(i): - - params = i['params'] - st = i['st'] - - script_meta = i['meta'] - - misc = i['misc_module'] - - script_path = i['script_path'] - script_url = i.get('script_url', '') - script_tags = i.get('script_tags', '') - - compute_meta = i.get('compute_meta', {}) - compute_tags = compute_meta.get('tags', []) - bench_meta = i.get('bench_meta', {}) - - compute_uid = compute_meta.get('uid', '') - bench_uid = bench_meta.get('uid', '') - - st_inputs_custom = {} - - bench_input = bench_meta.get('bench_input', {}) - - end_html = '' - - extra = {} - add_to_st_inputs = {} - - inp = script_meta['input_description'] - - # Here we can update params - v = compute_meta.get('mlperf_inference_device') - if v is not None and v != '': - inp['device']['force'] = v - - if v in ['tpu', 'gaudi']: - st.markdown('----') - st.markdown( - '**WARNING: unified CM workflow support for this hardware is pending - please [feel free to help](https://discord.gg/JjWNWXKxwT)!**') - return {'return': 0, 'skip': True, 'end_html': end_html} - - elif 'orin' in compute_tags: - st.markdown('----') - st.markdown( - '**WARNING: we need to encode CM knowledge from [this Orin setp](https://github.com/mlcommons/ck/blob/master/docs/mlperf/setup/setup-nvidia-jetson-orin.md) to this GUI!**') - return {'return': 0, 'skip': True, 'end_html': end_html} - - st.markdown('---') - st.markdown('**How would you like to run the MLPerf inference benchmark?**') - - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_device', - 'desc': inp['device']}) - device = r.get('value2') - inp['device']['force'] = device - - if device == 'cpu': - inp['implementation']['choices'] = ['mlcommons-python', - 'mlcommons-cpp', 'intel', 'ctuning-cpp-tflite'] - if 'intel' in compute_tags: - inp['implementation']['default'] = 'intel' - else: - inp['implementation']['default'] = 'mlcommons-python' - inp['backend']['choices'] = [ - 'onnxruntime', 'deepsparse', 'pytorch', 'tf', 'tvm-onnx'] - inp['backend']['default'] = 'onnxruntime' - elif device == 'rocm': - inp['implementation']['force'] = 'mlcommons-python' - inp['precision']['force'] = '' - inp['backend']['force'] = 'onnxruntime' - st.markdown( - '*WARNING: CM-MLPerf inference workflow was not tested thoroughly for AMD GPU - please feel free to test and improve!*') - elif device == 'qaic': - inp['implementation']['force'] = 'qualcomm' - inp['precision']['force'] = '' - inp['backend']['force'] = 'glow' - - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_division', - 'desc': inp['division']}) - division = r.get('value2') - inp['division']['force'] = division - - y = 'compliance' - if division == 'closed': - inp[y]['default'] = 'yes' - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_compliance', - 'desc': inp[y]}) - compliance = r.get('value2') - inp[y]['force'] = compliance - - if compliance == 'yes': - st.markdown( - '*:red[See [online table with required compliance tests](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#5132-inference)].*') - - else: - inp[y]['force'] = 'no' - - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_category', - 'desc': inp['category']}) - category = r.get('value2') - inp['category']['force'] = category - - ########################################################################## - # Implementation - v = bench_input.get('mlperf_inference_implementation') - if v is not None and v != '': - inp['implementation']['force'] = v - else: - if device == 'cuda': - inp['implementation']['choices'] = [ - 'nvidia', 'mlcommons-python', 'mlcommons-cpp'] - inp['implementation']['default'] = 'nvidia' - inp['backend']['choices'] = ['tensorrt', 'onnxruntime', 'pytorch'] - inp['backend']['default'] = 'tensorrt' - - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_implementation', - 'desc': inp['implementation']}) - implementation = r.get('value2') - inp['implementation']['force'] = implementation - - implementation_setup = '' - r = load_md(script_path, 'setup', 'i-' + implementation) - if r['return'] == 0: - implementation_setup = r['string'] - - url_faq_implementation = '' - r = get_url(script_url, script_path, 'faq', implementation, 'FAQ online') - if r['return'] == 0: - url_faq_implementation = r['url_online'] - - can_have_docker_flag = False - - if implementation == 'mlcommons-cpp': - # inp['backend']['choices'] = ['onnxruntime'] - inp['precision']['force'] = 'float32' - inp['backend']['force'] = 'onnxruntime' - inp['model']['choices'] = ['resnet50', 'retinanet'] - st.markdown( - '*:red[[CM automation recipe for this implementation](https://github.com/mlcommons/cm4mlops/tree/main/script/app-mlperf-inference-mlcommons-cpp)]*') - elif implementation == 'mlcommons-python': - inp['precision']['force'] = 'float32' - if device == 'cuda': - inp['backend']['choices'] = ['onnxruntime', 'pytorch', 'tf'] - inp['backend']['default'] = 'onnxruntime' - st.markdown( - '*:red[[CM automation recipe for this implementation](https://github.com/mlcommons/cm4mlops/tree/main/script/app-mlperf-inference-mlcommons-python)]*') - elif implementation == 'ctuning-cpp-tflite': - inp['precision']['force'] = 'float32' - inp['model']['force'] = 'resnet50' - st.markdown( - '*:red[[CM automation recipe for this implementation](https://github.com/mlcommons/cm4mlops/tree/main/script/app-mlperf-inference-ctuning-cpp-tflite)]*') - elif implementation == 'nvidia': - inp['backend']['force'] = 'tensorrt' - extra['skip_script_docker_func'] = True - can_have_docker_flag = True - st.markdown( - '*:red[[CM automation recipe for this implementation](https://github.com/mlcommons/cm4mlops/tree/main/script/app-mlperf-inference-nvidia)]*') - elif implementation == 'intel': - inp['model']['choices'] = ['bert-99', 'gptj-99'] - inp['model']['default'] = 'bert-99' - inp['precision']['choices'] = ['int8', 'int4'] - inp['precision']['default'] = 'int8' - inp['category']['force'] = 'datacenter' - inp['backend']['force'] = 'pytorch' - inp['sut']['default'] = 'sapphire-rapids.112c' - can_have_docker_flag = True - extra['skip_script_docker_func'] = True -# st.markdown('*:red[Note: Intel implementation require extra CM command to build and run Docker container - you will run CM commands to run MLPerf benchmarks there!]*') - st.markdown( - '*:red[[CM automation recipe for this implementation](https://github.com/mlcommons/cm4mlops/tree/main/script/reproduce-mlperf-inference-intel)]*') - elif implementation == 'qualcomm': - inp['model']['choices'] = ['resnet50', 'retinanet', 'bert-99'] - inp['model']['default'] = 'bert-99' - inp['precision']['default'] = 'float16' - extra['skip_script_docker_func'] = True - st.markdown( - '*:red[[CM automation recipe for this implementation](https://github.com/mlcommons/cm4mlops/tree/main/script/reproduce-mlperf-inference-qualcomm)]*') - - ########################################################################## - # Backend - - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_backend', - 'desc': inp['backend']}) - backend = r.get('value2') - inp['backend']['force'] = backend - - backend_setup = '' - r = load_md(script_path, 'setup', 'b-' + backend) - if r['return'] == 0: - backend_setup = r['string'] - - if backend == 'deepsparse': - inp['model']['choices'] = [ - 'resnet50', 'retinanet', 'bert-99', 'bert-99.9'] - inp['model']['default'] = 'bert-99' - inp['precision']['choices'] = ['float32', 'int8'] - inp['precision']['default'] = 'int8' - if 'force' in inp['precision']: - del (inp['precision']['force']) - - ########################################################################## - # Model - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_model', - 'desc': inp['model']}) - model = r.get('value2') - inp['model']['force'] = model - - github_doc_model = '' - - if model == 'retinanet': - x = '50' - if implementation == 'mlcommons-python': - x = '200' - st.markdown( - ':red[This model requires ~{}GB of free disk space for preprocessed dataset in a full/submission run!]\n'.format(x)) - - elif model.startswith('bert-'): - github_doc_model = 'bert' - - elif model.startswith('3d-unet-'): - github_doc_model = '3d-unet' - - elif model == 'rnnt': - github_doc_model = 'rnnt' - - elif model.startswith('dlrm-v2-'): - github_doc_model = 'dlrm_v2' - - elif model.startswith('gptj-'): - github_doc_model = 'gpt-j' - - elif model == 'sdxl': - github_doc_model = 'stable-diffusion-xl' - - elif model.startswith('llama2-'): - github_doc_model = 'llama2-70b' - - elif model.startswith('mixtral-'): - github_doc_model = 'mixtral-8x7b' - - if github_doc_model == '': - github_doc_model = model - - model_cm_url = 'https://github.com/mlcommons/ck/tree/master/docs/mlperf/inference/{}'.format( - github_doc_model) - extra_notes_online = '[Extra notes online]({})\n'.format(model_cm_url) - - st.markdown( - '*[CM-MLPerf GitHub docs for this model]({})*'.format(model_cm_url)) - - ########################################################################## - # Precision - if implementation == 'intel': - if model == 'bert-99': - inp['precision']['force'] = 'int8' - elif model == 'gptj-99': - inp['precision']['force'] = 'int4' - elif implementation == 'qualcomm': - if model == 'resnet50': - inp['precision']['print'] = 'int8' - elif model == 'retinanet': - inp['precision']['print'] = 'int8' - elif model == 'bert-99': - inp['precision']['print'] = 'int8/float16' - - if inp['precision'].get('force', '') == '': - x = inp['precision'].get('print', '') - if x != '': - st.markdown('**{}**: {}'.format(inp['precision']['desc'], x)) - else: - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_precision', - 'desc': inp['precision']}) - precision = r.get('value2') - inp['precision']['force'] = precision - - ########################################################################## - # Benchmark version - - script_meta_variations = script_meta['variations'] - - choices = [''] + [ - k for k in script_meta_variations if script_meta_variations[k].get( - 'group', '') == 'benchmark-version'] - desc = { - 'choices': choices, - 'default': choices[0], - 'desc': 'Force specific benchmark version?'} - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_version', - 'desc': desc}) - benchmark_version = r.get('value2') - - if benchmark_version != '': - params['~~benchmark-version'] = [benchmark_version] - - ########################################################################## - # Run via Docker container - if can_have_docker_flag: - - default_choice = 'yes - run in container' - - choices = [default_choice, 'no - run natively'] - desc = { - 'choices': choices, - 'default': choices[0], - 'desc': 'Should CM script prepare and run Docker container in interactive mode to run MLPerf? You can then copy/paste CM commands generated by this GUI to benchmark different models.'} - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_docker', - 'desc': desc}) - benchmark_docker = r.get('value2') - - if benchmark_docker == 'yes - run in container': - add_to_st_inputs['@docker'] = True - add_to_st_inputs['@docker_cache'] = 'no' - - ########################################################################## - # Prepare submission - st.markdown('---') - - submission = st.toggle( - 'Would you like to prepare official submission?', - value=False) - if submission: - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_hw_name', - 'desc': inp['hw_name']}) - inp['hw_name']['force'] = r.get('value2') - - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_submitter', - 'desc': inp['submitter']}) - submitter = r.get('value2') - inp['submitter']['force'] = submitter - - params['~~submission-generation'] = ['submission'] - params['~all-scenarios'] = ['true'] - inp['scenario']['force'] = '' - inp['clean']['default'] = False - inp['repro']['force'] = True - - x = '*:red[Use the following command to find local directory with the submission tree and results:]*\n```bash\ncm find cache --tags=submission,dir\n```\n' - - x += '*:red[You will also find results in `mlperf-inference-submission.tar.gz` file that you can submit to MLPerf!]*\n\n' - - x += '*:red[Note that if some results are INVALID due to too short run, you can rerun the same CM command and it should increase the length of the benchmark until you get valid result!]*\n' - - st.markdown(x) - - st.markdown('---') - - else: - inp['submitter']['force'] = '' - inp['clean']['default'] = True - params['~submission'] = ['false'] - - choices = [ - 'Performance', - 'Accuracy', - 'Find Performance from a short run', - 'Performance and Accuracy'] - desc = { - 'choices': choices, - 'default': choices[0], - 'desc': 'What to measure?'} - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_measure', - 'desc': desc}) - measure = r.get('value2') - - x = '' - if measure == 'Performance': - x = 'performance-only' - elif measure == 'Accuracy': - x = 'accuracy-only' - elif measure == 'Find Performance from a short run': - x = 'find-performance' - elif measure == 'Performance and Accuracy': - x = 'submission' - - params['~~submission-generation'] = [x] - - ####################################################################### - # Prepare scenario - - xall = 'All applicable' - choices = ['Offline', 'Server', 'SingleStream', 'MultiStream', xall] - desc = { - 'choices': choices, - 'default': choices[0], - 'desc': 'Which scenario(s)?'} - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_scenario', - 'desc': desc}) - scenario = r.get('value2') - - if scenario == xall: - params['~all-scenarios'] = ['true'] - inp['scenario']['force'] = '' - else: - inp['scenario']['force'] = scenario - - ########################################################################## - # Short or full run - - x = ['Full run', 'Short run'] - if submission: - choices = [x[0], x[1]] - else: - choices = [x[1], x[0]] - - desc = { - 'choices': choices, - 'default': choices[0], - 'desc': 'Short (test) or full (valid) run?'} - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_how', - 'desc': desc}) - how = r.get('value2') - - if how == x[0]: - params['~~submission-generation-style'] = ['full'] - inp['execution_mode']['force'] = 'valid' - else: - params['~~submission-generation-style'] = ['short'] - inp['execution_mode']['force'] = 'test' - - ########################################################################## - # Power - -# desc = {'boolean':True, 'default':False, 'desc':'Measure power?'} -# r = misc.make_selector({'st':st, 'st_inputs':st_inputs_custom, 'params':params, 'key': 'mlperf_inference_power', 'desc':desc}) -# power = r.get('value2', False) - - power = st.toggle('Measure power consumption?', value=False) - - if power: - inp['power']['force'] = 'yes' - - y = 'adr.mlperf-power-client.power_server' - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_power_server', - 'desc': inp[y]}) - inp[y]['force'] = r.get('value2') - - y = 'adr.mlperf-power-client.port' - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_power_port', - 'desc': inp[y]}) - inp[y]['force'] = r.get('value2') - - st.markdown( - '*:red[See [online notes](https://github.com/mlcommons/ck/blob/master/docs/tutorials/mlperf-inference-power-measurement.md)] to setup power meter and server.*') - - else: - inp['power']['force'] = 'no' - inp['adr.mlperf-power-client.power_server']['force'] = '' - inp['adr.mlperf-power-client.port']['force'] = '' - - ########################################################################## - # Dashboard - -# desc = {'boolean':True, 'default':False, 'desc':'Output results to W&B dashboard?'} -# r = misc.make_selector({'st':st, 'st_inputs':st_inputs_custom, 'params':params, 'key': 'mlperf_inference_dashboard', 'desc':desc}) -# dashboard = r.get('value2', False) - - dashboard = st.toggle('Output results to W&B dashboard?', value=False) - - if dashboard: - params['~dashboard'] = ['true'] - - y = 'dashboard_wb_project' - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_power_wb_project', - 'desc': inp[y]}) - inp[y]['force'] = r.get('value2') - - y = 'dashboard_wb_user' - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_power_wb_user', - 'desc': inp[y]}) - inp[y]['force'] = r.get('value2') - - else: - params['~dashboard'] = ['false'] - inp['dashboard_wb_project']['force'] = '' - inp['dashboard_wb_user']['force'] = '' - - # Hide customization by default - params['hide_script_customization'] = True - - x = implementation_setup - if backend_setup != '': - if x != '': - x += '\n\n' - x += backend_setup - - extra['extra_notes_online'] = extra_notes_online - extra['extra_faq_online'] = url_faq_implementation - extra['extra_setup'] = x - - ########################################################################## - value_reproduce = inp.get('repro', {}).get('force', False) - reproduce = st.toggle( - 'Record extra info for reproducibility?', - value=value_reproduce) - - explore = st.toggle( - 'Explore/tune benchmark (batch size, threads, etc)?', - value=False) - - if reproduce or explore: - add_to_st_inputs.update({ - "@repro_extra.run-mlperf-inference-app.bench_uid": bench_uid, - "@repro_extra.run-mlperf-inference-app.compute_uid": compute_uid, - '@results_dir': '{{CM_EXPERIMENT_PATH3}}', - '@submission_dir': '{{CM_EXPERIMENT_PATH3}}' - }) - - inp['repro']['force'] = True - extra['use_experiment'] = True - - if explore: - add_to_st_inputs['@batch_size'] = '{{CM_EXPLORE_BATCH_SIZE{[1,2,4,8]}}}' - - ########################################################################## - debug = st.toggle( - 'Debug and run MLPerf benchmark natively from command line after CM auto-generates CMD?', - value=False) - if debug: - inp['debug']['force'] = True - - extra['add_to_st_inputs'] = add_to_st_inputs - - return {'return': 0, 'end_html': end_html, 'extra': extra} diff --git a/cmx4mlops/cmx4mlops/repo/script/run-mlperf-inference-submission-checker/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/run-mlperf-inference-submission-checker/_cm.yaml index 84e712a40..0bb2079b0 100644 --- a/cmx4mlops/cmx4mlops/repo/script/run-mlperf-inference-submission-checker/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/run-mlperf-inference-submission-checker/_cm.yaml @@ -36,6 +36,7 @@ deps: tags: preprocess,mlperf,inference,submission input_mapping: extra_args: CM_MLPERF_SUBMISSION_CHECKER_EXTRA_ARGS + extra_checker_args: CM_MLPERF_SUBMISSION_CHECKER_EXTRA_ARGS extra_model_benchmark_map: CM_MLPERF_EXTRA_MODEL_MAPPING input: CM_MLPERF_INFERENCE_SUBMISSION_DIR power: CM_MLPERF_POWER @@ -50,6 +51,7 @@ input_mapping: src_version: CM_MLPERF_SUBMISSION_CHECKER_VERSION submission_dir: CM_MLPERF_INFERENCE_SUBMISSION_DIR submitter: CM_MLPERF_SUBMITTER + submitter_id: CM_MLPERF_SUBMITTER_ID tar: CM_TAR_SUBMISSION_DIR post_deps: - enable_if_env: @@ -66,6 +68,12 @@ post_deps: CM_TAR_SUBMISSION_DIR: - 'yes' tags: run,tar +- enable_if_env: + CM_SUBMITTER_ID: + - 'yes' + tags: submit,mlperf,results,_inference + env: + CM_MLPERF_SUBMISSION_FILE: <<>> tags: - run - mlc @@ -103,3 +111,7 @@ versions: adr: submission-checker-src: version: r4.1 + r5.0: + adr: + submission-checker-src: + version: master diff --git a/cmx4mlops/cmx4mlops/repo/script/submit-mlperf-results/COPYRIGHT.md b/cmx4mlops/cmx4mlops/repo/script/submit-mlperf-results/COPYRIGHT.md new file mode 100644 index 000000000..a059b0c49 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/submit-mlperf-results/COPYRIGHT.md @@ -0,0 +1,9 @@ +# Copyright Notice + +© 2024-2025 MLCommons. All Rights Reserved. + +This file is licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. A copy of the License can be obtained at: + +[Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) + +Unless required by applicable law or agreed to in writing, software distributed under the License is provided on an "AS IS" basis, without warranties or conditions of any kind, either express or implied. Please refer to the License for the specific language governing permissions and limitations under the License. diff --git a/cmx4mlops/cmx4mlops/repo/script/submit-mlperf-results/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/submit-mlperf-results/_cm.yaml new file mode 100644 index 000000000..b7a10ce83 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/submit-mlperf-results/_cm.yaml @@ -0,0 +1,22 @@ +alias: submit-mlperf-results +automation_alias: script +automation_uid: 5b4e0237da074764 +category: MLPerf benchmark support +default_env: + CM_MLPERF_SUBMISSION_URL: https://submissions-ui.mlcommons.org + +input_mapping: + input: CM_MLPERF_SUBMISSION_FILE + submitter_id: CM_MLPERF_SUBMITTER_ID +tags: +- submit +- mlperf +- results +- mlperf-results +- publish-results +- submission +uid: cc01f0a82bef4216 +variations: + inference: + env: + CM_MLPERF_BENCHMARK: "Inference" diff --git a/cmx4mlops/cmx4mlops/repo/script/submit-mlperf-results/customize.py b/cmx4mlops/cmx4mlops/repo/script/submit-mlperf-results/customize.py new file mode 100644 index 000000000..d39b233f3 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/submit-mlperf-results/customize.py @@ -0,0 +1,194 @@ +import requests +from cmind import utils +import cmind as cm +import os +import json + + +def preprocess(i): + + os_info = i['os_info'] + env = i['env'] + meta = i['meta'] + automation = i['automation'] + + server = env['CM_MLPERF_SUBMISSION_URL'] + benchmark = env['CM_MLPERF_BENCHMARK'] + submitter_id = env['CM_MLPERF_SUBMITTER_ID'] + file_path = env['CM_MLPERF_SUBMISSION_FILE'] + + r = get_signed_url(server, benchmark, submitter_id, file_path) + if r['return'] > 0: + return r + + signed_url = r['signed_url'] + submission_id = r['submission_id'] + + # print(signed_url) + # print(submission_id) + r = upload_file_to_signed_url(file_path, signed_url) + if r['return'] > 0: + return r + + r = trigger_submission_checker( + server, submitter_id, benchmark, submission_id) + if r['return'] > 0: + return r + + return {'return': 0} + + +def get_signed_url(server, benchmark, submitter_id, file_path): + # Define the URL + url = f"{server}/index/url" + + # Define the headers + headers = { + "Content-Type": "application/json" + } + + # Define the payload + payload = { + "submitter_id": submitter_id, + "benchmark": benchmark, + "filename": file_path + } + + try: + # Make the POST request + response = requests.post(url, json=payload, headers=headers) + + # Check the response status + if response.status_code == 200: + # print("Request successful!") + # print("Response:", response.json()) + pass + else: + # print(f"Request failed with status code {response.status_code}") + # print("Response:", response.text) + pass + + except requests.exceptions.RequestException as e: + return {"return": 1, + "error": f"An error occurred in connecting to the server: {e}"} + + response_json = response.json() + # print(response_json) + # response = json.loads(response_json) + try: + signed_url = response_json['signed_url'] + submission_id = response_json['submission_id'] + except Exception as e: + return { + "return": 1, "error": f"An error occurred while processing the response: {e}"} + + return {'return': 0, 'signed_url': signed_url, + 'submission_id': submission_id} + + +def upload_file_to_signed_url(file_path, signed_url): + """ + Uploads a file to a signed URL using HTTP PUT. + + Parameters: + file_path (str): The path to the file you want to upload. + signed_url (str): The pre-signed URL for uploading the file. + + Returns: + dict: A dictionary with 'status_code' and 'response' keys. + """ + headers = { + 'Content-Type': 'application/octet-stream', + 'Access-Control-Allow-Headers': '*' + } + + try: + # Open the file in binary mode + with open(file_path, 'rb') as file: + response = requests.put( + signed_url, + data=file, + headers=headers + ) + + if response.status_code in [200, 201, 204]: + print("File uploaded successfully!") + return { + 'return': 0 + } + else: + print( + f"Failed to upload file. Status code: {response.status_code}") + print("Response:", response.text) + + return { + 'return': response.status_code, + 'error': response.text + } + + except FileNotFoundError: + print("Error: File not found.") + return { + 'return': 400, + 'error': f'''File {file_path} not found''' + } + + except requests.exceptions.RequestException as e: + print(f"Request failed: {e}") + return { + 'return': 500, + 'error': str(e) + } + + +def trigger_submission_checker( + server_url, submitter_id, benchmark, submission_id): + """ + Sends a POST request with URL-encoded form data. + + Parameters: + server_url (str): The server endpoint URL (e.g., https://example.com/index). + submitter_id (str): The ID of the submitter. + benchmark (str): The benchmark identifier. + submission_id (str): The submission ID. + + Returns: + dict: A dictionary containing status code and response content. + """ + url = f"{server_url}/index" + headers = { + "Content-Type": "application/x-www-form-urlencoded" + } + payload = { + "submitter_id": submitter_id, + "benchmark": benchmark, + "submission_id": submission_id + } + + try: + # Make the POST request with URL-encoded data + response = requests.post(url, data=payload, headers=headers) + + if response.ok: + print("Submission Check Request successful!") + pass + else: + print( + f"Submission Check Request failed with status code: {response.status_code}") + print("Response:", response.text) + + return { + "return": 0, + "response": response.text + } + + except requests.exceptions.RequestException as e: + print("An error occurred:", e) + return { + "return": 500, + "error": str(e) + } + + +def postprocess(i): + return {'return': 0} diff --git a/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/script/process_tests.py b/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/script/process_tests.py new file mode 100644 index 000000000..8012d097b --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/script/process_tests.py @@ -0,0 +1,38 @@ +import sys +import os +import cmind as cm +import check as checks +import json +import yaml + +files = sys.argv[1:] + +for file in files: + print(file) + if not os.path.isfile(file) or not "script" in file: + continue + if not file.endswith("_cm.json") and not file.endswith("_cm.yaml"): + continue + script_path = os.path.dirname(file) + f = open(file) + if file.endswith(".json"): + data = json.load(f) + elif file.endswith(".yaml"): + data = yaml.safe_load(f) + if data.get('uid', '') == '': + continue # not a CM script meta + uid = data['uid'] + + ii = { + 'action': 'test', 'automation': 'script', 'artifact': uid, 'quiet': 'yes', 'out': 'con' + } + if os.environ.get('DOCKER_CM_REPO', '') != '': + ii['docker_cm_repo'] = os.environ['DOCKER_CM_REPO'] + if os.environ.get('DOCKER_CM_REPO_BRANCH', '') != '': + ii['docker_cm_repo_branch'] = os.environ['DOCKER_CM_REPO_BRANCH'] + if os.environ.get('TEST_INPUT_INDEX', '') != '': + ii['test_input_index'] = os.environ['TEST_INPUT_INDEX'] + print(ii) + r = cm.access(ii) + + checks.check_return(r) diff --git a/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/script/test_docker.py b/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/script/test_docker.py index ad867a2a1..1b63631c6 100644 --- a/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/script/test_docker.py +++ b/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/script/test_docker.py @@ -10,7 +10,7 @@ 'add_deps_recursive': { 'compiler': {'tags': "gcc"} }, - 'docker_cm_repo': 'mlcommons@cm4mlops', + 'docker_cm_repo': 'mlcommons@mlperf-automations', 'image_name': 'cm-script-app-image-classification-onnx-py', 'env': { 'CM_DOCKER_RUN_SCRIPT_TAGS': 'app,image-classification,onnx,python', @@ -27,7 +27,7 @@ 'add_deps_recursive': { 'compiler': {'tags': "gcc"} }, - 'docker_cm_repo': 'mlcommons@cm4mlops', + 'docker_cm_repo': 'mlcommons@mlperf-automations', 'image_name': 'cm-script-app-image-classification-onnx-py', 'env': { 'CM_DOCKER_RUN_SCRIPT_TAGS': 'app,image-classification,onnx,python', diff --git a/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/tutorials/test_tutorial_retinanet.py b/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/tutorials/test_tutorial_retinanet.py index 0b96f17f5..bc8d22f78 100644 --- a/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/tutorials/test_tutorial_retinanet.py +++ b/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/tutorials/test_tutorial_retinanet.py @@ -30,7 +30,7 @@ 'name': 'mlperf'}) checks.check_return(r) -r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short,_dashboard', 'adr': +r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short', 'adr': {'python': {'name': 'mlperf', 'version_min': '3.8'}, 'compiler': {'tags': "gcc"}, 'openimages-preprocessed': {'tags': '_50'}}, 'submitter': 'Community', 'implementation': 'cpp', 'hw_name': 'default', 'model': 'retinanet', 'backend': 'onnxruntime', 'device': 'cpu', 'scenario': 'Offline', 'test_query_count': '10', 'clean': 'true', 'quiet': 'yes'}) diff --git a/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/tutorials/test_tutorial_tvm_pip_ge.py b/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/tutorials/test_tutorial_tvm_pip_ge.py index 692ddeb83..4e17d572d 100644 --- a/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/tutorials/test_tutorial_tvm_pip_ge.py +++ b/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/tutorials/test_tutorial_tvm_pip_ge.py @@ -18,7 +18,7 @@ 'device': 'cpu', 'scenario': 'Offline', 'mode': 'accuracy', 'test_query_count': '5', 'clean': 'true', 'quiet': 'yes'}) checks.check_return(r) -r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short,_dashboard', 'adr': +r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short', 'adr': {'python': {'name': 'mlperf', 'version_min': '3.8'}, 'tvm': { 'tags': '_pip-install'}, 'tvm-model': {'tags': '_graph_executor'}}, 'submitter': 'Community', 'implementation': 'python', 'hw_name': 'default', 'model': 'resnet50', 'backend': 'tvm-onnx', diff --git a/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/tutorials/test_tutorial_tvm_pip_vm.py b/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/tutorials/test_tutorial_tvm_pip_vm.py index 5758ad08f..28bc0132b 100644 --- a/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/tutorials/test_tutorial_tvm_pip_vm.py +++ b/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/tutorials/test_tutorial_tvm_pip_vm.py @@ -20,7 +20,7 @@ 'mode': 'accuracy', 'test_query_count': '5', 'clean': 'true', 'quiet': 'yes'}) checks.check_return(r) -r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short,_dashboard', 'adr': +r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short', 'adr': {'python': {'name': 'mlperf', 'version_min': '3.8'}, 'tvm': {'tags': '_pip-install'}}, 'submitter': 'Community', 'implementation': 'python', 'hw_name': 'default', 'model': 'resnet50', 'backend': 'tvm-onnx', 'device': 'cpu', 'scenario': 'Offline', 'test_query_count': '500', 'clean': 'true', 'quiet': 'yes'}) diff --git a/cmx4mlops/pyproject.toml b/cmx4mlops/pyproject.toml new file mode 100644 index 000000000..678db8762 --- /dev/null +++ b/cmx4mlops/pyproject.toml @@ -0,0 +1,80 @@ +# Author and developer: Grigori Fursin + +[build-system] +build-backend = "setuptools.build_meta" +requires = ["setuptools"] + +[project] +name = "cmx4mlops" + +authors = [ + {name = "Grigori Fursin and contributors", email = "grigori.fursin@ctuning.org"} +] + +maintainers = [ + {name = "Grigori Fursin", email = "grigori.fursin@ctuning.org"} +] + +description = "CMX4MLOps repository" + +requires-python = ">=3.7" + +dependencies = [ + "cmind>=3.5.2", + "flextask" +] + +keywords = [ + "cmx4mlops", + "cmx4mlperf", + "cm4mlops", + "cm4mlperf", + "mlperf", + "virtual mlops", + "vmlops", + "cmind", + "workflow", + "automation", + "mlops", + "devops", + "aiops", + "portability", + "reusability" +] + +license = {text = "Apache 2.0"} + +classifiers = [ + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", +] + +dynamic = ["readme", "version"] + +[tool.setuptools.dynamic] +readme = {file = "cmx4mlops/README.md", content-type = "text/markdown"} +version = {file = "cmx4mlops/VERSION"} + +[tool.setuptools] +zip-safe = false +include-package-data = true + +[tool.setuptools.package-data] +cmx4mlops = ["**"] + +[tool.setuptools.packages.find] +where = ["."] +include = ["cmx4mlops"] + + +[project.urls] +Homepage = "https://github.com/mlcommons/ck" +Documentation = "TBD" +Repository = "https://github.com/mlcommons/ck/tree/master/cmx4mlops" +Issues = "https://github.com/mlcommons/ck/issues" +Changelog = "https://github.com/mlcommons/ck/blob/master/cmx4mlops/CHANGES.md" diff --git a/docs/README.md b/docs/README.md index a5fbcc2ac..c3fc2d31f 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,6 +1,6 @@ # CM documentation -**We plan to rewrite and simplify the CM documentation and tutorials based on user feedback in Q3 2024 - please stay tuned for more details**. +**We plan to rewrite and simplify the CM documentation and tutorials based on user feedback - please stay tuned for more details**. Collective Mind (CM) is a lightweight, non-intrusive and technology-agnostic workflow automation framework being developed by the [MLCommons Task Force on Automation and Reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md)