diff --git a/README.md b/README.md index 9e78da9a476..c1476bfef17 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ state-of-the-art optimizations to perform inference efficiently on NVIDIA GPUs.< [![python](https://img.shields.io/badge/python-3.10-green)](https://www.python.org/downloads/release/python-31012/) [![cuda](https://img.shields.io/badge/cuda-13.0.0-green)](https://developer.nvidia.com/cuda-downloads) [![torch](https://img.shields.io/badge/torch-2.9.0-green)](https://pytorch.org) -[![version](https://img.shields.io/badge/release-1.2.0rc8-green)](https://github.com/NVIDIA/TensorRT-LLM/blob/main/tensorrt_llm/version.py) +[![version](https://img.shields.io/badge/release-1.2.0rc7-green)](https://github.com/NVIDIA/TensorRT-LLM/blob/main/tensorrt_llm/version.py) [![license](https://img.shields.io/badge/license-Apache%202-blue)](https://github.com/NVIDIA/TensorRT-LLM/blob/main/LICENSE) [Architecture](https://nvidia.github.io/TensorRT-LLM/developer-guide/overview.html)   |   [Performance](https://nvidia.github.io/TensorRT-LLM/developer-guide/perf-overview.html)   |   [Examples](https://nvidia.github.io/TensorRT-LLM/quick-start-guide.html)   |   [Documentation](https://nvidia.github.io/TensorRT-LLM/)   |   [Roadmap](https://github.com/NVIDIA/TensorRT-LLM/issues?q=is%3Aissue%20state%3Aopen%20label%3Aroadmap) diff --git a/docs/source/features/feature-combination-matrix.md b/docs/source/features/feature-combination-matrix.md index d84143c4c53..8745a20064e 100644 --- a/docs/source/features/feature-combination-matrix.md +++ b/docs/source/features/feature-combination-matrix.md @@ -1,23 +1,19 @@ # Feature Combination Matrix -| Feature | Overlap Scheduler | CUDA Graph | Tensor Parallelism | Pipeline Parallelism | Expert Parallelism | Helix Parallelism | Attention Data Parallelism | Disaggregated Serving | Chunked Prefill | MTP | EAGLE-3(One Model Engine) | EAGLE-3(Two Model Engine) | Torch Sampler | TLLM C++ Sampler | KV Cache Reuse | Slide Window Attention | Logits Post Processor | Guided Decoding | LoRA | -| -------------------------- | ----------------- | ---------- | ------------------ | -------------------- | ------------------ | ----------------- | -------------------------- | --------------------- | --------------- | -------- | ------------------------- | ------------------------- | ------------- | ---------------- | -------------- | ---------------------- | --------------------- | --------------- | -------- | -| Overlap Scheduler | --- | | | | | | | | | | | | | | | | | | | -| CUDA Graph | Yes | --- | | | | | | | | | | | | | | | | | | -| Tensor Parallelism | Yes | Yes | --- | | | | | | | | | | | | | | | | | -| Pipeline Parallelism | Yes | Yes | Yes | --- | | | | | | | | | | | | | | | | -| Expert Parallelism | Yes | Yes | Yes | Yes | --- | | | | | | | | | | | | | | | -| Helix Parallelism | Untested | Yes | Yes | Yes | Yes | --- | | | | | | | | | | | | | | -| Attention Data Parallelism | Yes | Yes | Yes | Yes | Yes | Known issues | --- | | | | | | | | | | | | | -| Disaggregated Serving | Yes | Yes | Yes | Yes | Yes | Yes | Yes | --- | | | | | | | | | | | | -| Chunked Prefill | Yes | Yes | Yes | Untested | Yes | Yes | Yes | Yes | --- | | | | | | | | | | | -| MTP | Yes | Yes | Yes | No | Yes | No | Yes | Yes | Yes | --- | | | | | | | | | | -| EAGLE-3(One Model Engine) | Yes | Yes | Yes | No | Yes | No | Yes | Yes | Yes | No | --- | | | | | | | | | -| EAGLE-3(Two Model Engine) | Yes | Yes | Yes | No | Yes | No | Yes | Yes | Yes | No | No | --- | | | | | | | | -| Torch Sampler | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | --- | | | | | | | -| TLLM C++ Sampler | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | No | No | No | No | --- | | | | | | -| KV Cache Reuse | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | --- | | | | | -| Slide Window Attention | Yes | Yes | Yes | Yes | Yes | Untested | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | --- | | | | -| Logits Post Processor | Yes | Yes | Yes | Yes | Yes | Yes | Yes | No | Yes | No | No | No | Yes | Yes | Yes | Yes | --- | | | -| Guided Decoding | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | --- | | -| LoRA | Yes | No | Yes | Yes | Untested | Untested | Untested | Untested | Yes | Untested | Untested | Untested | Yes | Yes | Yes | Yes | Yes | Untested | --- | +| Feature | Overlap Scheduler | CUDA Graph | Attention Data Parallelism | Disaggregated Serving | Chunked Prefill | MTP | EAGLE-3(One Model Engine) | EAGLE-3(Two Model Engine) | Torch Sampler | TLLM C++ Sampler | KV Cache Reuse | Slide Window Attention | Logits Post Processor | Guided Decoding | LoRA | +| -------------------------- | ----------------- | ---------- | -------------------------- | --------------------- | --------------- | -------- | ------------------------- | ------------------------- | ------------- | ---------------- | -------------- | ---------------------- | --------------------- | --------------- | ---- | +| Overlap Scheduler | --- | | | | | | | | | | | | | | | +| CUDA Graph | Yes | --- | | | | | | | | | | | | | | +| Attention Data Parallelism | Yes | Yes | --- | | | | | | | | | | | | | +| Disaggregated Serving | Yes | Yes | Yes | --- | | | | | | | | | | | | +| Chunked Prefill | Yes | Yes | Yes | Yes | --- | | | | | | | | | | | +| MTP | Yes | Yes | Yes | Yes | Yes | --- | | | | | | | | | | +| EAGLE-3(One Model Engine) | Yes | Yes | Yes | Yes | Yes | No | --- | | | | | | | | | +| EAGLE-3(Two Model Engine) | Yes | Yes | Yes | Yes | Yes | No | No | --- | | | | | | | | +| Torch Sampler | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | --- | | | | | | | +| TLLM C++ Sampler | Yes | Yes | Yes | Yes | Yes | No | No | No | No | --- | | | | | | +| KV Cache Reuse | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | --- | | | | | +| Slide Window Attention | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | --- | | | | +| Logits Post Processor | Yes | Yes | Yes | No | Yes | No | No | No | Yes | Yes | Yes | Yes | --- | | | +| Guided Decoding | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | --- | | +| LoRA | Yes | No | Untested | Untested | Untested | Untested | Untested | Untested | Yes | Yes | Yes | Yes | Yes | Untested | --- | diff --git a/docs/source/models/supported-models.md b/docs/source/models/supported-models.md index 332304f5d52..2185e384733 100644 --- a/docs/source/models/supported-models.md +++ b/docs/source/models/supported-models.md @@ -40,11 +40,10 @@ Note: Support for other models may vary. Features marked "N/A" are not applicabl | `Qwen3MoeForCausalLM` | Yes | Yes | Yes | Yes | Yes | No | Yes | Yes | Yes | Yes | Yes | N/A | Yes | Yes | | `Qwen3NextForCausalLM` | Yes | Yes | No | Untested | Yes | No | No | No | Yes | Yes | No | No | Untested | Untested | | `Llama4ForConditionalGeneration` | Yes | Yes | Yes | Yes | Yes | No | Yes | Yes | Yes | Yes | Untested | N/A | Yes | Yes | -| `GptOssForCausalLM` | Yes | Yes | Yes | Yes | Yes | No | Yes | Yes [^3] | Yes | Yes | Yes | N/A | Yes | Yes | +| `GptOssForCausalLM` | Yes | Yes | Yes | Yes | No | No | Yes | No | Yes | Yes | No | N/A | Yes | Yes | [^1]: Chunked Prefill for MLA can only be enabled on SM100/SM103. [^2]: KV cache reuse for MLA can only be enabled on SM90/SM100/SM103 and in BF16/FP8 KV cache dtype. -[^3]: Overlap scheduler isn't supported when using EAGLE-3(Two Model Engine) for GPT-OSS. # Multimodal Feature Support Matrix (PyTorch Backend) diff --git a/examples/constraints.txt b/examples/constraints.txt index 02571fa1b68..26378475000 100644 --- a/examples/constraints.txt +++ b/examples/constraints.txt @@ -1,3 +1,3 @@ -tensorrt_llm==1.2.0rc8 +tensorrt_llm==1.2.0rc7 evaluate~=0.4.1 rouge_score~=0.1.2 diff --git a/examples/models/core/mistral_large_3/README.md b/examples/models/core/mistral_large_3/README.md index da219bf7b0a..5ea730c9f10 100644 --- a/examples/models/core/mistral_large_3/README.md +++ b/examples/models/core/mistral_large_3/README.md @@ -19,8 +19,7 @@ mpirun -n 1 --allow-run-as-root --oversubscribe python3 examples/llm-api/quickst --max_tokens 100 \ --checkpoint_format mistral \ --model_type mistral_large_3 \ - --moe_backend TRTLLM \ - --image_format pil + --moe_backend TRTLLM ``` ## LLM-only run diff --git a/jenkins/L0_Test.groovy b/jenkins/L0_Test.groovy index a6fef33a635..f3a8226167b 100644 --- a/jenkins/L0_Test.groovy +++ b/jenkins/L0_Test.groovy @@ -808,7 +808,7 @@ def getPytestBaseCommandLine( portEnvVars, pytestUtil, "pytest", - "-vv", + "-v", testFilter[(DETAILED_LOG)] ? "-s" : "", "--timeout-method=thread", "--apply-test-list-correction", diff --git a/security_scanning/docs/poetry.lock b/security_scanning/docs/poetry.lock index f012c4737ff..86ab05727be 100644 --- a/security_scanning/docs/poetry.lock +++ b/security_scanning/docs/poetry.lock @@ -1195,13 +1195,13 @@ typing-extensions = ">=4.12.0" [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/auto_deploy/poetry.lock b/security_scanning/examples/auto_deploy/poetry.lock index 607bf79e0c3..e4e3db4c5d8 100644 --- a/security_scanning/examples/auto_deploy/poetry.lock +++ b/security_scanning/examples/auto_deploy/poetry.lock @@ -3635,13 +3635,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/draft_target_model/poetry.lock b/security_scanning/examples/draft_target_model/poetry.lock index 9175f8b9ec3..fa46e1e5db5 100644 --- a/security_scanning/examples/draft_target_model/poetry.lock +++ b/security_scanning/examples/draft_target_model/poetry.lock @@ -1831,13 +1831,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/eagle/poetry.lock b/security_scanning/examples/eagle/poetry.lock index 51badcabe38..1a7b36af59c 100644 --- a/security_scanning/examples/eagle/poetry.lock +++ b/security_scanning/examples/eagle/poetry.lock @@ -1807,13 +1807,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/llm-eval/lm-eval-harness/poetry.lock b/security_scanning/examples/llm-eval/lm-eval-harness/poetry.lock index bd679a12372..a1479215601 100644 --- a/security_scanning/examples/llm-eval/lm-eval-harness/poetry.lock +++ b/security_scanning/examples/llm-eval/lm-eval-harness/poetry.lock @@ -3273,13 +3273,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/lookahead/poetry.lock b/security_scanning/examples/lookahead/poetry.lock index 9175f8b9ec3..fa46e1e5db5 100644 --- a/security_scanning/examples/lookahead/poetry.lock +++ b/security_scanning/examples/lookahead/poetry.lock @@ -1831,13 +1831,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/medusa/poetry.lock b/security_scanning/examples/medusa/poetry.lock index 9175f8b9ec3..fa46e1e5db5 100644 --- a/security_scanning/examples/medusa/poetry.lock +++ b/security_scanning/examples/medusa/poetry.lock @@ -1831,13 +1831,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/models/contrib/baichuan/poetry.lock b/security_scanning/examples/models/contrib/baichuan/poetry.lock index 533aa08614f..2977c6aab1d 100644 --- a/security_scanning/examples/models/contrib/baichuan/poetry.lock +++ b/security_scanning/examples/models/contrib/baichuan/poetry.lock @@ -1888,13 +1888,13 @@ telegram = ["requests"] [[package]] name = "transformers" -version = "5.0.0rc2" +version = "5.0.0rc1" description = "Transformers: the model-definition framework for state-of-the-art machine learning models in text, vision, audio, and multimodal models, for both inference and training." optional = false python-versions = ">=3.10.0" files = [ - {file = "transformers-5.0.0rc2-py3-none-any.whl", hash = "sha256:f8f2a14060ab11f20a0eec39d827af54c1589c327c5799d82808ae3f4167418a"}, - {file = "transformers-5.0.0rc2.tar.gz", hash = "sha256:9f2fa5e132433dd7eb910dc224b32de0baf758f3b6ffc918dbb632e0af85c07a"}, + {file = "transformers-5.0.0rc1-py3-none-any.whl", hash = "sha256:8b9604700769872cab4280dbcde201f557e93f72ee5a85c4592275ab4f15d330"}, + {file = "transformers-5.0.0rc1.tar.gz", hash = "sha256:1fdde557b96ef8ea277c45b8e0d558f1e167fe28a98593f4c4aec0277e335821"}, ] [package.dependencies] @@ -1912,15 +1912,15 @@ typer-slim = "*" [package.extras] accelerate = ["accelerate (>=1.1.0)"] -all = ["Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "av", "codecarbon (>=2.8.1)", "jinja2 (>=3.1.0)", "jmespath (>=1.0.1)", "kenlm", "kernels (>=0.10.2,<0.11)", "librosa", "mistral-common[opencv] (>=1.6.3)", "num2words", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "timm (>=1.0.23)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torchaudio", "torchvision"] +all = ["Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "av", "codecarbon (>=2.8.1)", "jinja2 (>=3.1.0)", "jmespath (>=1.0.1)", "kenlm", "kernels (>=0.10.2,<0.11)", "librosa", "mistral-common[opencv] (>=1.6.3)", "num2words", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torchaudio", "torchvision"] audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] benchmark = ["optimum-benchmark (>=0.3.0)"] chat-template = ["jinja2 (>=3.1.0)", "jmespath (>=1.0.1)"] codecarbon = ["codecarbon (>=2.8.1)"] deepspeed = ["accelerate (>=1.1.0)", "deepspeed (>=0.9.3)"] deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "libcst", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "openai (>=1.98.0)", "optuna", "parameterized (>=0.9)", "protobuf", "psutil", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "tensorboard", "timeout-decorator", "torch (>=2.2)", "uvicorn"] -dev = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "av", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "jinja2 (>=3.1.0)", "jmespath (>=1.0.1)", "kenlm", "kernels (>=0.10.2,<0.11)", "libcst", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "openai (>=1.98.0)", "optuna", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (>=1.0.23)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torch (>=2.2)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)", "uvicorn"] -dev-torch = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "kenlm", "kernels (>=0.10.2,<0.11)", "libcst", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "openai (>=1.98.0)", "optuna", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (>=1.0.23)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torch (>=2.2)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)", "uvicorn"] +dev = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "av", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "jinja2 (>=3.1.0)", "jmespath (>=1.0.1)", "kenlm", "kernels (>=0.10.2,<0.11)", "libcst", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "openai (>=1.98.0)", "optuna", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torch (>=2.2)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)", "uvicorn"] +dev-torch = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "kenlm", "kernels (>=0.10.2,<0.11)", "libcst", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "openai (>=1.98.0)", "optuna", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torch (>=2.2)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)", "uvicorn"] ftfy = ["ftfy"] hf-xet = ["hf_xet"] hub-kernels = ["kernels (>=0.10.2,<0.11)"] @@ -1943,7 +1943,7 @@ sklearn = ["scikit-learn"] speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] testing = ["GitPython (<3.1.19)", "accelerate (>=1.1.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "libcst", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "openai (>=1.98.0)", "parameterized (>=0.9)", "psutil", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "tensorboard", "timeout-decorator", "torch (>=2.2)", "uvicorn"] tiktoken = ["blobfile", "tiktoken"] -timm = ["timm (>=1.0.23)"] +timm = ["timm (!=1.0.18,<=1.0.19)"] tokenizers = ["tokenizers (>=0.22.0,<=0.23.0)"] torch = ["accelerate (>=1.1.0)", "torch (>=2.2)"] torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] @@ -2007,13 +2007,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/models/contrib/bloom/poetry.lock b/security_scanning/examples/models/contrib/bloom/poetry.lock index 9175f8b9ec3..fa46e1e5db5 100644 --- a/security_scanning/examples/models/contrib/bloom/poetry.lock +++ b/security_scanning/examples/models/contrib/bloom/poetry.lock @@ -1831,13 +1831,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/models/contrib/chatglm-6b/poetry.lock b/security_scanning/examples/models/contrib/chatglm-6b/poetry.lock index 4c278b6e731..4bcc69beb5d 100644 --- a/security_scanning/examples/models/contrib/chatglm-6b/poetry.lock +++ b/security_scanning/examples/models/contrib/chatglm-6b/poetry.lock @@ -1923,13 +1923,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/models/contrib/chatglm2-6b/poetry.lock b/security_scanning/examples/models/contrib/chatglm2-6b/poetry.lock index 4c278b6e731..4bcc69beb5d 100644 --- a/security_scanning/examples/models/contrib/chatglm2-6b/poetry.lock +++ b/security_scanning/examples/models/contrib/chatglm2-6b/poetry.lock @@ -1923,13 +1923,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/models/contrib/chatglm3-6b-32k/poetry.lock b/security_scanning/examples/models/contrib/chatglm3-6b-32k/poetry.lock index 4c278b6e731..4bcc69beb5d 100644 --- a/security_scanning/examples/models/contrib/chatglm3-6b-32k/poetry.lock +++ b/security_scanning/examples/models/contrib/chatglm3-6b-32k/poetry.lock @@ -1923,13 +1923,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/models/contrib/dbrx/poetry.lock b/security_scanning/examples/models/contrib/dbrx/poetry.lock index 4da25d198e3..ad71da46bb2 100644 --- a/security_scanning/examples/models/contrib/dbrx/poetry.lock +++ b/security_scanning/examples/models/contrib/dbrx/poetry.lock @@ -1805,13 +1805,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/models/contrib/deepseek_v1/poetry.lock b/security_scanning/examples/models/contrib/deepseek_v1/poetry.lock index 350b5d05f7a..a9fec7edd0d 100644 --- a/security_scanning/examples/models/contrib/deepseek_v1/poetry.lock +++ b/security_scanning/examples/models/contrib/deepseek_v1/poetry.lock @@ -1753,13 +1753,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/models/contrib/deepseek_v2/poetry.lock b/security_scanning/examples/models/contrib/deepseek_v2/poetry.lock index b5ea1f3c930..00f49a484b6 100644 --- a/security_scanning/examples/models/contrib/deepseek_v2/poetry.lock +++ b/security_scanning/examples/models/contrib/deepseek_v2/poetry.lock @@ -1753,13 +1753,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/models/contrib/falcon/poetry.lock b/security_scanning/examples/models/contrib/falcon/poetry.lock index 4887acc3b2a..734d8430c20 100644 --- a/security_scanning/examples/models/contrib/falcon/poetry.lock +++ b/security_scanning/examples/models/contrib/falcon/poetry.lock @@ -1874,13 +1874,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/models/contrib/gptj/poetry.lock b/security_scanning/examples/models/contrib/gptj/poetry.lock index 350b5d05f7a..a9fec7edd0d 100644 --- a/security_scanning/examples/models/contrib/gptj/poetry.lock +++ b/security_scanning/examples/models/contrib/gptj/poetry.lock @@ -1753,13 +1753,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/models/contrib/gptneox/poetry.lock b/security_scanning/examples/models/contrib/gptneox/poetry.lock index 350b5d05f7a..a9fec7edd0d 100644 --- a/security_scanning/examples/models/contrib/gptneox/poetry.lock +++ b/security_scanning/examples/models/contrib/gptneox/poetry.lock @@ -1753,13 +1753,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/models/contrib/grok/poetry.lock b/security_scanning/examples/models/contrib/grok/poetry.lock index abb8c091c6c..9c2dd8c4ee1 100644 --- a/security_scanning/examples/models/contrib/grok/poetry.lock +++ b/security_scanning/examples/models/contrib/grok/poetry.lock @@ -2718,13 +2718,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/models/contrib/internlm/poetry.lock b/security_scanning/examples/models/contrib/internlm/poetry.lock index 9175f8b9ec3..fa46e1e5db5 100644 --- a/security_scanning/examples/models/contrib/internlm/poetry.lock +++ b/security_scanning/examples/models/contrib/internlm/poetry.lock @@ -1831,13 +1831,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/models/contrib/jais/poetry.lock b/security_scanning/examples/models/contrib/jais/poetry.lock index 9175f8b9ec3..fa46e1e5db5 100644 --- a/security_scanning/examples/models/contrib/jais/poetry.lock +++ b/security_scanning/examples/models/contrib/jais/poetry.lock @@ -1831,13 +1831,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/models/contrib/mmdit/poetry.lock b/security_scanning/examples/models/contrib/mmdit/poetry.lock index ecf4a89bcca..9f2df3b29d1 100644 --- a/security_scanning/examples/models/contrib/mmdit/poetry.lock +++ b/security_scanning/examples/models/contrib/mmdit/poetry.lock @@ -1036,13 +1036,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/models/contrib/mpt/poetry.lock b/security_scanning/examples/models/contrib/mpt/poetry.lock index 350b5d05f7a..a9fec7edd0d 100644 --- a/security_scanning/examples/models/contrib/mpt/poetry.lock +++ b/security_scanning/examples/models/contrib/mpt/poetry.lock @@ -1753,13 +1753,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/models/contrib/opt/poetry.lock b/security_scanning/examples/models/contrib/opt/poetry.lock index 350b5d05f7a..a9fec7edd0d 100644 --- a/security_scanning/examples/models/contrib/opt/poetry.lock +++ b/security_scanning/examples/models/contrib/opt/poetry.lock @@ -1753,13 +1753,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/models/contrib/skywork/poetry.lock b/security_scanning/examples/models/contrib/skywork/poetry.lock index 9175f8b9ec3..fa46e1e5db5 100644 --- a/security_scanning/examples/models/contrib/skywork/poetry.lock +++ b/security_scanning/examples/models/contrib/skywork/poetry.lock @@ -1831,13 +1831,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/models/contrib/smaug/poetry.lock b/security_scanning/examples/models/contrib/smaug/poetry.lock index 9175f8b9ec3..fa46e1e5db5 100644 --- a/security_scanning/examples/models/contrib/smaug/poetry.lock +++ b/security_scanning/examples/models/contrib/smaug/poetry.lock @@ -1831,13 +1831,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/models/contrib/stdit/poetry.lock b/security_scanning/examples/models/contrib/stdit/poetry.lock index e951097a080..be2df63bc71 100644 --- a/security_scanning/examples/models/contrib/stdit/poetry.lock +++ b/security_scanning/examples/models/contrib/stdit/poetry.lock @@ -784,20 +784,20 @@ i18n = ["Babel (>=2.7)"] [[package]] name = "jsonschema" -version = "4.26.0" +version = "4.25.1" description = "An implementation of JSON Schema validation for Python" optional = false -python-versions = ">=3.10" +python-versions = ">=3.9" files = [ - {file = "jsonschema-4.26.0-py3-none-any.whl", hash = "sha256:d489f15263b8d200f8387e64b4c3a75f06629559fb73deb8fdfb525f2dab50ce"}, - {file = "jsonschema-4.26.0.tar.gz", hash = "sha256:0c26707e2efad8aa1bfc5b7ce170f3fccc2e4918ff85989ba9ffa9facb2be326"}, + {file = "jsonschema-4.25.1-py3-none-any.whl", hash = "sha256:3fba0169e345c7175110351d456342c364814cfcf3b964ba4587f22915230a63"}, + {file = "jsonschema-4.25.1.tar.gz", hash = "sha256:e4a9655ce0da0c0b67a085847e00a3a51449e1157f4f75e9fb5aa545e122eb85"}, ] [package.dependencies] attrs = ">=22.2.0" jsonschema-specifications = ">=2023.03.6" referencing = ">=0.28.4" -rpds-py = ">=0.25.0" +rpds-py = ">=0.7.1" [package.extras] format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"] @@ -2202,13 +2202,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/models/core/commandr/poetry.lock b/security_scanning/examples/models/core/commandr/poetry.lock index 350b5d05f7a..a9fec7edd0d 100644 --- a/security_scanning/examples/models/core/commandr/poetry.lock +++ b/security_scanning/examples/models/core/commandr/poetry.lock @@ -1753,13 +1753,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/models/core/gemma/poetry.lock b/security_scanning/examples/models/core/gemma/poetry.lock index f939cd8ff0d..1df2fa84afd 100644 --- a/security_scanning/examples/models/core/gemma/poetry.lock +++ b/security_scanning/examples/models/core/gemma/poetry.lock @@ -2748,13 +2748,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/models/core/glm-4-9b/poetry.lock b/security_scanning/examples/models/core/glm-4-9b/poetry.lock index 4c278b6e731..4bcc69beb5d 100644 --- a/security_scanning/examples/models/core/glm-4-9b/poetry.lock +++ b/security_scanning/examples/models/core/glm-4-9b/poetry.lock @@ -1923,13 +1923,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/models/core/gpt/poetry.lock b/security_scanning/examples/models/core/gpt/poetry.lock index 9175f8b9ec3..fa46e1e5db5 100644 --- a/security_scanning/examples/models/core/gpt/poetry.lock +++ b/security_scanning/examples/models/core/gpt/poetry.lock @@ -1831,13 +1831,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/models/core/llama/poetry.lock b/security_scanning/examples/models/core/llama/poetry.lock index 377d1ba39fc..63461d261f1 100644 --- a/security_scanning/examples/models/core/llama/poetry.lock +++ b/security_scanning/examples/models/core/llama/poetry.lock @@ -1874,13 +1874,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/models/core/mamba/poetry.lock b/security_scanning/examples/models/core/mamba/poetry.lock index 2e5ea9eec8e..1357ac01b9f 100644 --- a/security_scanning/examples/models/core/mamba/poetry.lock +++ b/security_scanning/examples/models/core/mamba/poetry.lock @@ -1874,13 +1874,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/models/core/mixtral/poetry.lock b/security_scanning/examples/models/core/mixtral/poetry.lock index c000cfcdd5b..723716366eb 100644 --- a/security_scanning/examples/models/core/mixtral/poetry.lock +++ b/security_scanning/examples/models/core/mixtral/poetry.lock @@ -1315,13 +1315,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/models/core/mllama/poetry.lock b/security_scanning/examples/models/core/mllama/poetry.lock index 43a87a1c899..29d28af4494 100644 --- a/security_scanning/examples/models/core/mllama/poetry.lock +++ b/security_scanning/examples/models/core/mllama/poetry.lock @@ -1800,13 +1800,13 @@ typing-extensions = ">=4.12.0" [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/models/core/nemotron/poetry.lock b/security_scanning/examples/models/core/nemotron/poetry.lock index 350b5d05f7a..a9fec7edd0d 100644 --- a/security_scanning/examples/models/core/nemotron/poetry.lock +++ b/security_scanning/examples/models/core/nemotron/poetry.lock @@ -1753,13 +1753,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/models/core/phi/poetry.lock b/security_scanning/examples/models/core/phi/poetry.lock index 074cfc6f1b5..db07266c65b 100644 --- a/security_scanning/examples/models/core/phi/poetry.lock +++ b/security_scanning/examples/models/core/phi/poetry.lock @@ -1816,13 +1816,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/models/core/qwen/poetry.lock b/security_scanning/examples/models/core/qwen/poetry.lock index 7f07926aee8..95f4cf1e216 100644 --- a/security_scanning/examples/models/core/qwen/poetry.lock +++ b/security_scanning/examples/models/core/qwen/poetry.lock @@ -3452,13 +3452,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/models/core/qwen2audio/poetry.lock b/security_scanning/examples/models/core/qwen2audio/poetry.lock index 1e9fab8dc6c..6ff53e9c23f 100644 --- a/security_scanning/examples/models/core/qwen2audio/poetry.lock +++ b/security_scanning/examples/models/core/qwen2audio/poetry.lock @@ -1971,13 +1971,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/models/core/qwenvl/poetry.lock b/security_scanning/examples/models/core/qwenvl/poetry.lock index 6ca5cc273eb..a19aa6b0d02 100644 --- a/security_scanning/examples/models/core/qwenvl/poetry.lock +++ b/security_scanning/examples/models/core/qwenvl/poetry.lock @@ -2927,13 +2927,13 @@ telegram = ["requests"] [[package]] name = "transformers" -version = "5.0.0rc2" +version = "5.0.0rc1" description = "Transformers: the model-definition framework for state-of-the-art machine learning models in text, vision, audio, and multimodal models, for both inference and training." optional = false python-versions = ">=3.10.0" files = [ - {file = "transformers-5.0.0rc2-py3-none-any.whl", hash = "sha256:f8f2a14060ab11f20a0eec39d827af54c1589c327c5799d82808ae3f4167418a"}, - {file = "transformers-5.0.0rc2.tar.gz", hash = "sha256:9f2fa5e132433dd7eb910dc224b32de0baf758f3b6ffc918dbb632e0af85c07a"}, + {file = "transformers-5.0.0rc1-py3-none-any.whl", hash = "sha256:8b9604700769872cab4280dbcde201f557e93f72ee5a85c4592275ab4f15d330"}, + {file = "transformers-5.0.0rc1.tar.gz", hash = "sha256:1fdde557b96ef8ea277c45b8e0d558f1e167fe28a98593f4c4aec0277e335821"}, ] [package.dependencies] @@ -2951,15 +2951,15 @@ typer-slim = "*" [package.extras] accelerate = ["accelerate (>=1.1.0)"] -all = ["Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "av", "codecarbon (>=2.8.1)", "jinja2 (>=3.1.0)", "jmespath (>=1.0.1)", "kenlm", "kernels (>=0.10.2,<0.11)", "librosa", "mistral-common[opencv] (>=1.6.3)", "num2words", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "timm (>=1.0.23)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torchaudio", "torchvision"] +all = ["Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "av", "codecarbon (>=2.8.1)", "jinja2 (>=3.1.0)", "jmespath (>=1.0.1)", "kenlm", "kernels (>=0.10.2,<0.11)", "librosa", "mistral-common[opencv] (>=1.6.3)", "num2words", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torchaudio", "torchvision"] audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] benchmark = ["optimum-benchmark (>=0.3.0)"] chat-template = ["jinja2 (>=3.1.0)", "jmespath (>=1.0.1)"] codecarbon = ["codecarbon (>=2.8.1)"] deepspeed = ["accelerate (>=1.1.0)", "deepspeed (>=0.9.3)"] deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "libcst", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "openai (>=1.98.0)", "optuna", "parameterized (>=0.9)", "protobuf", "psutil", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "tensorboard", "timeout-decorator", "torch (>=2.2)", "uvicorn"] -dev = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "av", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "jinja2 (>=3.1.0)", "jmespath (>=1.0.1)", "kenlm", "kernels (>=0.10.2,<0.11)", "libcst", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "openai (>=1.98.0)", "optuna", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (>=1.0.23)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torch (>=2.2)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)", "uvicorn"] -dev-torch = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "kenlm", "kernels (>=0.10.2,<0.11)", "libcst", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "openai (>=1.98.0)", "optuna", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (>=1.0.23)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torch (>=2.2)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)", "uvicorn"] +dev = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "av", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "jinja2 (>=3.1.0)", "jmespath (>=1.0.1)", "kenlm", "kernels (>=0.10.2,<0.11)", "libcst", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "openai (>=1.98.0)", "optuna", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torch (>=2.2)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)", "uvicorn"] +dev-torch = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "kenlm", "kernels (>=0.10.2,<0.11)", "libcst", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "openai (>=1.98.0)", "optuna", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torch (>=2.2)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)", "uvicorn"] ftfy = ["ftfy"] hf-xet = ["hf_xet"] hub-kernels = ["kernels (>=0.10.2,<0.11)"] @@ -2982,7 +2982,7 @@ sklearn = ["scikit-learn"] speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] testing = ["GitPython (<3.1.19)", "accelerate (>=1.1.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "libcst", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "openai (>=1.98.0)", "parameterized (>=0.9)", "psutil", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "tensorboard", "timeout-decorator", "torch (>=2.2)", "uvicorn"] tiktoken = ["blobfile", "tiktoken"] -timm = ["timm (>=1.0.23)"] +timm = ["timm (!=1.0.18,<=1.0.19)"] tokenizers = ["tokenizers (>=0.22.0,<=0.23.0)"] torch = ["accelerate (>=1.1.0)", "torch (>=2.2)"] torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] @@ -3074,13 +3074,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/models/core/recurrentgemma/poetry.lock b/security_scanning/examples/models/core/recurrentgemma/poetry.lock index fd983afd510..740545c551c 100644 --- a/security_scanning/examples/models/core/recurrentgemma/poetry.lock +++ b/security_scanning/examples/models/core/recurrentgemma/poetry.lock @@ -2515,13 +2515,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/models/core/whisper/poetry.lock b/security_scanning/examples/models/core/whisper/poetry.lock index fd7e7ec9f35..0f1e21f7810 100644 --- a/security_scanning/examples/models/core/whisper/poetry.lock +++ b/security_scanning/examples/models/core/whisper/poetry.lock @@ -2873,13 +2873,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/ngram/poetry.lock b/security_scanning/examples/ngram/poetry.lock index 3bfc89605a3..60b7e78b1ea 100644 --- a/security_scanning/examples/ngram/poetry.lock +++ b/security_scanning/examples/ngram/poetry.lock @@ -1821,13 +1821,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/quantization/poetry.lock b/security_scanning/examples/quantization/poetry.lock index 96d2ed90667..fa3dfba0e4f 100644 --- a/security_scanning/examples/quantization/poetry.lock +++ b/security_scanning/examples/quantization/poetry.lock @@ -1855,13 +1855,13 @@ telegram = ["requests"] [[package]] name = "transformers" -version = "5.0.0rc2" +version = "5.0.0rc1" description = "Transformers: the model-definition framework for state-of-the-art machine learning models in text, vision, audio, and multimodal models, for both inference and training." optional = false python-versions = ">=3.10.0" files = [ - {file = "transformers-5.0.0rc2-py3-none-any.whl", hash = "sha256:f8f2a14060ab11f20a0eec39d827af54c1589c327c5799d82808ae3f4167418a"}, - {file = "transformers-5.0.0rc2.tar.gz", hash = "sha256:9f2fa5e132433dd7eb910dc224b32de0baf758f3b6ffc918dbb632e0af85c07a"}, + {file = "transformers-5.0.0rc1-py3-none-any.whl", hash = "sha256:8b9604700769872cab4280dbcde201f557e93f72ee5a85c4592275ab4f15d330"}, + {file = "transformers-5.0.0rc1.tar.gz", hash = "sha256:1fdde557b96ef8ea277c45b8e0d558f1e167fe28a98593f4c4aec0277e335821"}, ] [package.dependencies] @@ -1879,15 +1879,15 @@ typer-slim = "*" [package.extras] accelerate = ["accelerate (>=1.1.0)"] -all = ["Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "av", "codecarbon (>=2.8.1)", "jinja2 (>=3.1.0)", "jmespath (>=1.0.1)", "kenlm", "kernels (>=0.10.2,<0.11)", "librosa", "mistral-common[opencv] (>=1.6.3)", "num2words", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "timm (>=1.0.23)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torchaudio", "torchvision"] +all = ["Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "av", "codecarbon (>=2.8.1)", "jinja2 (>=3.1.0)", "jmespath (>=1.0.1)", "kenlm", "kernels (>=0.10.2,<0.11)", "librosa", "mistral-common[opencv] (>=1.6.3)", "num2words", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torchaudio", "torchvision"] audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] benchmark = ["optimum-benchmark (>=0.3.0)"] chat-template = ["jinja2 (>=3.1.0)", "jmespath (>=1.0.1)"] codecarbon = ["codecarbon (>=2.8.1)"] deepspeed = ["accelerate (>=1.1.0)", "deepspeed (>=0.9.3)"] deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "libcst", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "openai (>=1.98.0)", "optuna", "parameterized (>=0.9)", "protobuf", "psutil", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "tensorboard", "timeout-decorator", "torch (>=2.2)", "uvicorn"] -dev = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "av", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "jinja2 (>=3.1.0)", "jmespath (>=1.0.1)", "kenlm", "kernels (>=0.10.2,<0.11)", "libcst", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "openai (>=1.98.0)", "optuna", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (>=1.0.23)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torch (>=2.2)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)", "uvicorn"] -dev-torch = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "kenlm", "kernels (>=0.10.2,<0.11)", "libcst", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "openai (>=1.98.0)", "optuna", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (>=1.0.23)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torch (>=2.2)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)", "uvicorn"] +dev = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "av", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "jinja2 (>=3.1.0)", "jmespath (>=1.0.1)", "kenlm", "kernels (>=0.10.2,<0.11)", "libcst", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "openai (>=1.98.0)", "optuna", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torch (>=2.2)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)", "uvicorn"] +dev-torch = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "kenlm", "kernels (>=0.10.2,<0.11)", "libcst", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "openai (>=1.98.0)", "optuna", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torch (>=2.2)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)", "uvicorn"] ftfy = ["ftfy"] hf-xet = ["hf_xet"] hub-kernels = ["kernels (>=0.10.2,<0.11)"] @@ -1910,7 +1910,7 @@ sklearn = ["scikit-learn"] speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] testing = ["GitPython (<3.1.19)", "accelerate (>=1.1.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "libcst", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "openai (>=1.98.0)", "parameterized (>=0.9)", "psutil", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "tensorboard", "timeout-decorator", "torch (>=2.2)", "uvicorn"] tiktoken = ["blobfile", "tiktoken"] -timm = ["timm (>=1.0.23)"] +timm = ["timm (!=1.0.18,<=1.0.19)"] tokenizers = ["tokenizers (>=0.22.0,<=0.23.0)"] torch = ["accelerate (>=1.1.0)", "torch (>=2.2)"] torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] @@ -1974,13 +1974,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/ray_orchestrator/poetry.lock b/security_scanning/examples/ray_orchestrator/poetry.lock index bfa715a5b17..b6c048c435b 100644 --- a/security_scanning/examples/ray_orchestrator/poetry.lock +++ b/security_scanning/examples/ray_orchestrator/poetry.lock @@ -730,20 +730,20 @@ type = ["mypy (<1.19)", "pytest-mypy (>=1.0.1)"] [[package]] name = "jsonschema" -version = "4.26.0" +version = "4.25.1" description = "An implementation of JSON Schema validation for Python" optional = false -python-versions = ">=3.10" +python-versions = ">=3.9" files = [ - {file = "jsonschema-4.26.0-py3-none-any.whl", hash = "sha256:d489f15263b8d200f8387e64b4c3a75f06629559fb73deb8fdfb525f2dab50ce"}, - {file = "jsonschema-4.26.0.tar.gz", hash = "sha256:0c26707e2efad8aa1bfc5b7ce170f3fccc2e4918ff85989ba9ffa9facb2be326"}, + {file = "jsonschema-4.25.1-py3-none-any.whl", hash = "sha256:3fba0169e345c7175110351d456342c364814cfcf3b964ba4587f22915230a63"}, + {file = "jsonschema-4.25.1.tar.gz", hash = "sha256:e4a9655ce0da0c0b67a085847e00a3a51449e1157f4f75e9fb5aa545e122eb85"}, ] [package.dependencies] attrs = ">=22.2.0" jsonschema-specifications = ">=2023.03.6" referencing = ">=0.28.4" -rpds-py = ">=0.25.0" +rpds-py = ">=0.7.1" [package.extras] format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"] @@ -1891,13 +1891,13 @@ typing-extensions = ">=4.12.0" [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] @@ -1908,18 +1908,18 @@ zstd = ["backports-zstd (>=1.0.0)"] [[package]] name = "virtualenv" -version = "20.36.0" +version = "20.35.4" description = "Virtual Python Environment builder" optional = false python-versions = ">=3.8" files = [ - {file = "virtualenv-20.36.0-py3-none-any.whl", hash = "sha256:e7ded577f3af534fd0886d4ca03277f5542053bedb98a70a989d3c22cfa5c9ac"}, - {file = "virtualenv-20.36.0.tar.gz", hash = "sha256:a3601f540b515a7983508113f14e78993841adc3d83710fa70f0ac50f43b23ed"}, + {file = "virtualenv-20.35.4-py3-none-any.whl", hash = "sha256:c21c9cede36c9753eeade68ba7d523529f228a403463376cf821eaae2b650f1b"}, + {file = "virtualenv-20.35.4.tar.gz", hash = "sha256:643d3914d73d3eeb0c552cbb12d7e82adf0e504dbf86a3182f8771a153a1971c"}, ] [package.dependencies] distlib = ">=0.3.7,<1" -filelock = {version = ">=3.20.1,<4", markers = "python_version >= \"3.10\""} +filelock = ">=3.12.2,<4" platformdirs = ">=3.9.1,<5" typing-extensions = {version = ">=4.13.2", markers = "python_version < \"3.11\""} diff --git a/security_scanning/examples/redrafter/poetry.lock b/security_scanning/examples/redrafter/poetry.lock index 9175f8b9ec3..fa46e1e5db5 100644 --- a/security_scanning/examples/redrafter/poetry.lock +++ b/security_scanning/examples/redrafter/poetry.lock @@ -1831,13 +1831,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/serve/poetry.lock b/security_scanning/examples/serve/poetry.lock index 6138f057f9a..e24bcb08ca9 100644 --- a/security_scanning/examples/serve/poetry.lock +++ b/security_scanning/examples/serve/poetry.lock @@ -2600,13 +2600,13 @@ test = ["coverage", "pytest", "pytest-cov"] [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/examples/trtllm-eval/poetry.lock b/security_scanning/examples/trtllm-eval/poetry.lock index 28c200491c5..f13e70ad4a4 100644 --- a/security_scanning/examples/trtllm-eval/poetry.lock +++ b/security_scanning/examples/trtllm-eval/poetry.lock @@ -3275,13 +3275,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/security_scanning/metadata.json b/security_scanning/metadata.json index 0acdbb16e92..567d8ffa7aa 100644 --- a/security_scanning/metadata.json +++ b/security_scanning/metadata.json @@ -1,4 +1,4 @@ { - "commit_hash": "b85c447ceb1ff91c5d4df6b71de2256a5fabfe9d", - "timestamp": "2026-01-08T02:42:38Z" + "commit_hash": "6095c80e560c612b8a8f4ff70e8930edf7798d21", + "timestamp": "2026-01-07T02:39:54Z" } diff --git a/security_scanning/poetry.lock b/security_scanning/poetry.lock index 4ef7b31b7ac..6d04c2d6793 100644 --- a/security_scanning/poetry.lock +++ b/security_scanning/poetry.lock @@ -1838,20 +1838,20 @@ files = [ [[package]] name = "jsonschema" -version = "4.26.0" +version = "4.25.1" description = "An implementation of JSON Schema validation for Python" optional = false -python-versions = ">=3.10" +python-versions = ">=3.9" files = [ - {file = "jsonschema-4.26.0-py3-none-any.whl", hash = "sha256:d489f15263b8d200f8387e64b4c3a75f06629559fb73deb8fdfb525f2dab50ce"}, - {file = "jsonschema-4.26.0.tar.gz", hash = "sha256:0c26707e2efad8aa1bfc5b7ce170f3fccc2e4918ff85989ba9ffa9facb2be326"}, + {file = "jsonschema-4.25.1-py3-none-any.whl", hash = "sha256:3fba0169e345c7175110351d456342c364814cfcf3b964ba4587f22915230a63"}, + {file = "jsonschema-4.25.1.tar.gz", hash = "sha256:e4a9655ce0da0c0b67a085847e00a3a51449e1157f4f75e9fb5aa545e122eb85"}, ] [package.dependencies] attrs = ">=22.2.0" jsonschema-specifications = ">=2023.03.6" referencing = ">=0.28.4" -rpds-py = ">=0.25.0" +rpds-py = ">=0.7.1" [package.extras] format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"] @@ -3852,13 +3852,13 @@ xmp = ["defusedxml"] [[package]] name = "plotly" -version = "6.5.1" +version = "6.5.0" description = "An open-source interactive data visualization library for Python" optional = false python-versions = ">=3.8" files = [ - {file = "plotly-6.5.1-py3-none-any.whl", hash = "sha256:5adad4f58c360612b6c5ce11a308cdbc4fd38ceb1d40594a614f0062e227abe1"}, - {file = "plotly-6.5.1.tar.gz", hash = "sha256:b0478c8d5ada0c8756bce15315bcbfec7d3ab8d24614e34af9aff7bfcfea9281"}, + {file = "plotly-6.5.0-py3-none-any.whl", hash = "sha256:5ac851e100367735250206788a2b1325412aa4a4917a4fe3e6f0bc5aa6f3d90a"}, + {file = "plotly-6.5.0.tar.gz", hash = "sha256:d5d38224883fd38c1409bef7d6a8dc32b74348d39313f3c52ca998b8e447f5c8"}, ] [package.dependencies] @@ -5930,13 +5930,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] @@ -6339,4 +6339,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.13" -content-hash = "dab9694d64d1c91b512eb62bbd31da9d0cdb8c93e99941a7022f2f46aea905e3" +content-hash = "04c93699fd51f42c19b5111e408e5240d4dc0616a9d2501c7d159c01b3331ca2" diff --git a/security_scanning/pyproject.toml b/security_scanning/pyproject.toml index f8addf4b750..bc7ac1222ed 100644 --- a/security_scanning/pyproject.toml +++ b/security_scanning/pyproject.toml @@ -59,7 +59,7 @@ einops = "^0.8.1" flashinfer-python = ">=0.3.0,<0.4.0" xgrammar = "0.1.25" llguidance = "0.7.29" -jsonschema = "^4.26.0" +jsonschema = "^4.25.1" backoff = "^2.2.1" nvtx = "^0.2.14" matplotlib = "^3.10.8" @@ -73,7 +73,7 @@ tiktoken = "^0.12.0" blobfile = "^3.1.0" openai-harmony = "0.0.4" nvidia-cutlass-dsl = "4.3.4" -plotly = "^6.5.1" +plotly = "^6.5.0" numexpr = "<2.14.0" partial-json-parser = "^0.2.1.1.post7" torch-c-dlpack-ext = "0.1.3" diff --git a/security_scanning/triton_backend/poetry.lock b/security_scanning/triton_backend/poetry.lock index 2c000435171..6fa35b5ad71 100644 --- a/security_scanning/triton_backend/poetry.lock +++ b/security_scanning/triton_backend/poetry.lock @@ -982,13 +982,13 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, ] [package.extras] diff --git a/tensorrt_llm/_torch/auto_deploy/models/custom/modeling_nemotron_h.py b/tensorrt_llm/_torch/auto_deploy/models/custom/modeling_nemotron_h.py index 259f997afd8..40dc215b34d 100644 --- a/tensorrt_llm/_torch/auto_deploy/models/custom/modeling_nemotron_h.py +++ b/tensorrt_llm/_torch/auto_deploy/models/custom/modeling_nemotron_h.py @@ -113,6 +113,11 @@ def __init__(self, config, layer_idx: int): A = torch.arange(1, self.num_heads + 1) self.A_log = nn.Parameter(torch.log(A)) self.A_log._no_weight_decay = True + # Instead of recomputing `torch.exp(self.A_log.float())` on every forward pass, we will register a hook + # that sets this appropriately when loading weights. + # NOTE: we explicitly register this as a non-persistent buffer so that it does not appear in the state dict of + # this module, or an equivalent graph module trace from it, but still gets included in e.g. `to()` calls. + self.register_buffer("_minus_A", -A.float(), persistent=False) self.norm = MambaRMSNormGated( self.intermediate_size, eps=self.layer_norm_epsilon, @@ -124,6 +129,8 @@ def __init__(self, config, layer_idx: int): self.out_proj = nn.Linear(self.intermediate_size, self.hidden_size, bias=config.use_bias) self.use_bias = config.use_bias + self.register_load_state_dict_post_hook(self._load_state_dict_post_hook) + def torch_forward(self, input_states): batch_size, seq_len, _ = input_states.shape dtype = input_states.dtype @@ -159,7 +166,7 @@ def torch_forward(self, input_states): ) # 3. SSM transformation - A = -torch.exp(self.A_log.float()) + A = self._minus_A y = torch.ops.auto_deploy.torch_ssm( hidden_states=hidden_states.view(batch_size, seq_len, -1, self.head_dim), A=A, @@ -186,6 +193,10 @@ def torch_forward(self, input_states): def forward(self, hidden_states): return self.torch_forward(hidden_states) + @staticmethod + def _load_state_dict_post_hook(module, incompatible_keys) -> None: + module._minus_A.data = -torch.exp(module.A_log.float()) + class NemotronHRMSNorm(nn.Module): def __init__(self, hidden_size, eps=1e-6): diff --git a/tensorrt_llm/_torch/auto_deploy/transform/library/sharding.py b/tensorrt_llm/_torch/auto_deploy/transform/library/sharding.py index 30df2a6842c..deb82a43e66 100644 --- a/tensorrt_llm/_torch/auto_deploy/transform/library/sharding.py +++ b/tensorrt_llm/_torch/auto_deploy/transform/library/sharding.py @@ -646,6 +646,7 @@ def apply(self, gm: GraphModule, node: Node) -> None: gm, node, self.config, + self.mlp_type, scale_names=self.scale_names(), ) @@ -663,7 +664,7 @@ def scale_names(self) -> List[str]: return ["input_scale", "weight_scale", "alpha"] def apply(self, gm: GraphModule, node: Node) -> None: - _insert_sharded_moe(gm, node, self.config, scale_names=self.scale_names()) + _insert_sharded_moe(gm, node, self.config, self.mlp_type, scale_names=self.scale_names()) EP_SHARDING_RULES = [ diff --git a/tensorrt_llm/_torch/custom_ops/cute_dsl_custom_ops.py b/tensorrt_llm/_torch/custom_ops/cute_dsl_custom_ops.py index 771e7ed7c8c..ae61e2b64c9 100644 --- a/tensorrt_llm/_torch/custom_ops/cute_dsl_custom_ops.py +++ b/tensorrt_llm/_torch/custom_ops/cute_dsl_custom_ops.py @@ -6,7 +6,7 @@ from tensorrt_llm.logger import logger from ..._utils import get_sm_version -from ...math_utils import ceil_div, pad_up +from ...math_utils import pad_up from ..autotuner import (AutoTuner, ConstraintSpec, DistributedTuningStrategy, DynamicTensorSpec, OptimizationProfile, TunableRunner, TuningConfig) @@ -314,16 +314,6 @@ def inputs_pre_hook(self, inputs: List[torch.Tensor]) -> List[torch.Tensor]: num_non_exiting_tiles, global_sf) -def get_dense_gemm_approximate_cta_nums( - M: int, N: int, tile_mn: Tuple[int, int], - cluster_shape_mn: Tuple[int, int]) -> int: - tile_m, tile_n = tile_mn - cluster_m, cluster_n = cluster_shape_mn - clustered_ctas_m = pad_up(ceil_div(M, tile_m), cluster_m) - clustered_ctas_n = pad_up(ceil_div(N, tile_n), cluster_n) - return clustered_ctas_m * clustered_ctas_n - - if IS_CUTLASS_DSL_AVAILABLE: import cutlass @@ -370,6 +360,15 @@ def __init__(self, def unique_id(self): return (self.output_dtype, self.to_userbuffers, self.use_tvm_ffi) + def __hash__(self): + return hash( + (self.output_dtype, self.to_userbuffers, self.use_tvm_ffi)) + + def __eq__(self, other): + if not isinstance(other, self.__class__): + return False + return self.output_dtype == other.output_dtype and self.to_userbuffers == other.to_userbuffers and self.use_tvm_ffi == other.use_tvm_ffi + def get_valid_tactics( self, inputs: List[torch.Tensor], @@ -455,7 +454,6 @@ def get_valid_tactics( (4, 4), ] swap_ab_candidates = [True, False] - # prune: prefetch is beneficial only when K is large enough use_prefetch_candidates = [True, False] valid_tactics = [] @@ -486,19 +484,6 @@ def get_valid_tactics( b_major, c_major, ): - # Prefetch pruning to save tuning time - cta_nums = get_dense_gemm_approximate_cta_nums( - m, n, mma_tiler_mn, cluster_shape_mn) - cta_wave_ratio = cta_nums / torch.cuda.get_device_properties( - ).multi_processor_count - if use_prefetch and not any(( - # CTA waves ratio between 0.5 and 1.0 - 0.5 < cta_wave_ratio < 1.0, - # K is large enough - real_k >= 8192, - )): - continue - valid_tactics.append( (mma_tiler_mn, cluster_shape_mn, swap_ab, use_prefetch)) diff --git a/tensorrt_llm/_torch/models/checkpoints/base_weight_mapper.py b/tensorrt_llm/_torch/models/checkpoints/base_weight_mapper.py index 790be65eed5..4d78b3dcb19 100644 --- a/tensorrt_llm/_torch/models/checkpoints/base_weight_mapper.py +++ b/tensorrt_llm/_torch/models/checkpoints/base_weight_mapper.py @@ -29,6 +29,9 @@ def init_model_and_config(self, model: Union[nn.Module, raise ValueError("model must have a config attribute") self._tp_size = 1 if model.model_config.mapping.enable_attention_dp else model.model_config.mapping.tp_size + self._head_dim = model.config.head_dim if hasattr( + model.config, 'head_dim' + ) and model.config.head_dim is not None else model.config.hidden_size // model.config.num_attention_heads self.map_weights() @@ -170,11 +173,3 @@ def model(self) -> Union[nn.Module, DecoderModelForCausalLM]: if self._model is None: raise RuntimeError("Weight mapper is not initialized") return self._model - - @property - def _head_dim(self) -> int: - model = self.model - head_dim = model.config.head_dim if hasattr( - model.config, 'head_dim' - ) and model.config.head_dim is not None else model.config.hidden_size // model.config.num_attention_heads - return head_dim diff --git a/tensorrt_llm/_torch/models/checkpoints/hf/qwen3vl_weight_mapper.py b/tensorrt_llm/_torch/models/checkpoints/hf/qwen3vl_weight_mapper.py index 24a3602db94..41b3da875ea 100644 --- a/tensorrt_llm/_torch/models/checkpoints/hf/qwen3vl_weight_mapper.py +++ b/tensorrt_llm/_torch/models/checkpoints/hf/qwen3vl_weight_mapper.py @@ -1,8 +1,3 @@ -from transformers.models.qwen3_vl.configuration_qwen3_vl import ( - Qwen3VLTextConfig, - Qwen3VLVisionConfig, -) - from tensorrt_llm._torch.models.checkpoints.hf.weight_mapper import HfWeightMapper from tensorrt_llm._torch.models.modeling_utils import register_mapper @@ -11,17 +6,3 @@ class Qwen3VLHfWeightMapper(HfWeightMapper): def preprocess_weights(self, weights: dict) -> dict: return weights - - @property - def _head_dim(self) -> int: - config = self.model.config - if (head_dim := getattr(config, "head_dim", None)) is not None: - return head_dim - if isinstance(config, Qwen3VLTextConfig): - num_heads = config.num_attention_heads - elif isinstance(config, Qwen3VLVisionConfig): - num_heads = config.num_heads - else: - raise TypeError(f"Unexpected config class {type(config).__name__}.") - - return config.hidden_size // num_heads diff --git a/tensorrt_llm/_torch/models/checkpoints/mistral/config_loader.py b/tensorrt_llm/_torch/models/checkpoints/mistral/config_loader.py index c679734fcf8..b72cb6da38d 100644 --- a/tensorrt_llm/_torch/models/checkpoints/mistral/config_loader.py +++ b/tensorrt_llm/_torch/models/checkpoints/mistral/config_loader.py @@ -103,14 +103,17 @@ def _remap_mistral_yarn_args(config: dict) -> dict: "apply_scale": "apply_yarn_scaling", } yarn_config = config.get("yarn") or {} - config["rope_scaling"] = { + config["rope_parameters"] = { "rope_type": "yarn", "mscale_all_dim": 1, } + if rope_theta := config.pop("rope_theta", None): + config["rope_parameters"]["rope_theta"] = rope_theta + for old_name, new_name in yarn_config_map.items(): if old_name in yarn_config: - config["rope_scaling"][new_name] = yarn_config.pop(old_name) + config["rope_parameters"][new_name] = yarn_config.pop(old_name) assert len(yarn_config) == 0, f"Unparsed yarn config: {yarn_config}" diff --git a/tensorrt_llm/_torch/models/modeling_mistral.py b/tensorrt_llm/_torch/models/modeling_mistral.py index 99ff8169c12..ea06b5e1001 100644 --- a/tensorrt_llm/_torch/models/modeling_mistral.py +++ b/tensorrt_llm/_torch/models/modeling_mistral.py @@ -46,7 +46,6 @@ MultimodalPlaceholderPlacement, TextPrompt, register_input_processor) from tensorrt_llm.inputs.multimodal import MultimodalParams -from tensorrt_llm.inputs.utils import encode_base64_image from tensorrt_llm.llmapi import SamplingParams from tensorrt_llm.logger import logger @@ -59,28 +58,16 @@ def __init__( layer_idx: int | None = None, ): config = model_config.pretrained_config - rope_params = RopeParams.from_config(config) - rope_params_section = getattr(config, "rope_scaling", None) or getattr( - config, "rope_parameters", None) - rope_type = getattr(rope_params_section, "rope_type", None) - if rope_type == "yarn": - pos_embd_params = PositionalEmbeddingParams( - type=PositionEmbeddingType.yarn, - rope=rope_params, - is_neox=False) - else: - pos_embd_params = PositionalEmbeddingParams( - type=PositionEmbeddingType.rope_gpt_neox, - rope=rope_params, - ) - super().__init__( hidden_size=config.hidden_size, num_attention_heads=config.num_attention_heads, num_key_value_heads=config.num_key_value_heads, max_position_embeddings=config.max_position_embeddings, bias=False, - pos_embd_params=pos_embd_params, + pos_embd_params=PositionalEmbeddingParams( + type=PositionEmbeddingType.rope_gpt_neox, + rope=RopeParams.from_config(config), + ), layer_idx=layer_idx, dtype=config.torch_dtype, config=model_config, @@ -279,18 +266,20 @@ def _get_num_multimodal_tokens(self, image_sizes): } def get_num_tokens_per_image(self, image_sizes): + # FIXME avoid double loading with custom loader h, w = image_sizes ncols, nrows = self.image_processor._image_to_num_tokens( Image.new("RGB", (w, h))) return ncols * nrows + nrows - def __call__(self, text, images, **kwargs): - mm_items = [] - if images: - mm_items = [{ - "type": "image", - "base64": encode_base64_image(image) - } for image in images] + def __call__(self, text, images, media, **kwargs): + assert media is not None + if isinstance(media, str): + media = [media] + + mm_items = [{"type": "image_url", "image_url": url} for url in media] + + logger.debug(f"text: {text}") conversation = [{ "role": "user", @@ -303,20 +292,19 @@ def __call__(self, text, images, **kwargs): encoded = self.tokenizer.transformers_tokenizer.apply_chat_template( conversation, tokenize=True, return_dict=True, return_tensors='pt') + logger.debug( + f"encoded.pixel_values.shape: {encoded.pixel_values.shape}, encoded.input_ids: {encoded.input_ids[0][-20:]}" + ) + logger.debug( + f"encoded.input_ids list: {self.tokenizer.transformers_tokenizer.apply_chat_template(conversation)}" + ) + processed = { "input_ids": encoded.input_ids, + "pixel_values": encoded.pixel_values.to(self.dtype), + "attention_mask": encoded.attention_mask, + "image_sizes": torch.tensor([encoded.pixel_values.shape[2:]]) } - - # text-only mode for VLM - if "pixel_values" in encoded: - processed.update({ - "pixel_values": - encoded.pixel_values.to(self.dtype), - "attention_mask": - encoded.attention_mask, - "image_sizes": - torch.tensor([encoded.pixel_values.shape[2:]]) - }) return processed @@ -388,6 +376,7 @@ def __call__( self, inputs: TextPrompt, sampling_params: SamplingParams ) -> Tuple[List[int], ExtraProcessedInputs | None]: images = inputs.get("multi_modal_data", {}).get("image") + mm_processor_kwargs = inputs.get("mm_processor_kwargs", {}) do_rescale = getattr(self.processor.image_processor, "do_rescale", False) if images is not None and isinstance(images[0], torch.Tensor): @@ -395,15 +384,18 @@ def __call__( # format is "pt" (pytorch tensors), but not for "pil" (PIL images). do_rescale = False - if images is not None: + if mm_processor_kwargs: + # Currently, we only support image modality in MistralCommonImageProcessor. processed = self.processor( text=inputs["prompt"], images=images, do_rescale=do_rescale, + **mm_processor_kwargs, ) else: processed = self.text_processor( text=inputs["prompt"], + images=images, do_rescale=do_rescale, ) input_ids = processed.pop("input_ids").tolist()[0] diff --git a/tensorrt_llm/_torch/models/modeling_qwen3vl.py b/tensorrt_llm/_torch/models/modeling_qwen3vl.py index d073f6745b7..f89d801f94d 100644 --- a/tensorrt_llm/_torch/models/modeling_qwen3vl.py +++ b/tensorrt_llm/_torch/models/modeling_qwen3vl.py @@ -25,7 +25,6 @@ MultimodalPlaceholderPlacement, TextPrompt, register_input_processor, - support_multimodal_disaggregated, ) from ...inputs.multimodal import MultimodalParams from ...logger import logger @@ -351,85 +350,6 @@ def __call__( "multimodal_data": multimodal_data, } - def get_prompt_token_ids( - self, inputs: TextPrompt, mm_handles: List[Dict[str, Any]] - ) -> Tuple[List[int], List[int], List[int]]: - """ - Build input token ids with multimodal placeholders expanded to the number of MM tokens. - - Args: - inputs: Text prompt input container. Must contain a non-empty prompt string. - mm_handles: List of multimodal embedding handles. Currently only a single handle is supported. - - Returns: - Tuple[List[int], List[int], List[int]]: - - expanded_ids: token ids with each image token expanded to a placeholder repeated per MM token - - mm_token_length: per-image MM token lengths - - mm_token_offsets: start offsets (positions) for each image's MM tokens within expanded_ids - """ - # TODO: Move this function to the base input processor class when extending for more models - text_prompt = inputs.get("prompt") - if not text_prompt: - raise ValueError("Text prompt is required but not provided") - - if not isinstance(mm_handles, list): - raise TypeError("mm_handles must be a list") - - if len(mm_handles) > 1: - # TODO: only support single multimodal item within a request for now - raise NotImplementedError("Only one mm_handle is supported for Qwen3 VL for now") - - hidden_size = mm_handles[0]["tensor_size"][1] - num_deepstack_levels = len(self.config.vision_config.deepstack_visual_indexes) - # This is because, unlike previous Qwen VL models, the embeddings are concatenated with - # feature maps from deepstack layers. - expected_size = self.config.text_config.hidden_size * (1 + num_deepstack_levels) - if hidden_size != expected_size: - raise RuntimeError( - f"Expected multimodal embedding to have hidden size {expected_size}, got {hidden_size}." - ) - - input_ids = self.tokenizer(text_prompt, return_tensors="pt").input_ids[0] - - # TODO: what about `video_token_id`? - image_token_index = self.config.image_token_id - - image_mask = input_ids == image_token_index - image_positions = torch.where(image_mask)[0] - num_images = len(image_positions) - assert num_images == len(mm_handles), "Number of images must match number of mm_handles" - total_mm_tokens = sum(mm_handle["tensor_size"][0] for mm_handle in mm_handles) - final_length = len(input_ids) - num_images + total_mm_tokens - # Create output tensor - expanded_ids = torch.empty(final_length, dtype=input_ids.dtype) - placeholder_id = self.tllm_multimodal_token_id - - # Fill the expanded sequence - write_pos = 0 - image_cnt = 0 - mm_token_length = [] - mm_token_offsets = [] - for read_pos in range(len(input_ids)): - if input_ids[read_pos] == image_token_index: - # Replace with placeholder id - mm_token_num = mm_handles[image_cnt]["tensor_size"][0] - expanded_ids[write_pos : write_pos + mm_token_num] = placeholder_id - mm_token_offsets.append(write_pos) - mm_token_length.append(mm_token_num) - write_pos += mm_token_num - image_cnt += 1 - else: - # Copy text token as-is - expanded_ids[write_pos] = input_ids[read_pos] - write_pos += 1 - - assert write_pos == final_length, f"Write position mismatch: {write_pos} != {final_length}" - assert mm_token_length[-1] + mm_token_offsets[-1] <= final_length, ( - f"mm_token_length[-1] + mm_token_offsets[-1] ({mm_token_length[-1] + mm_token_offsets[-1]}) should be less " - f"than or equal to final_length ({final_length})" - ) - return expanded_ids.to(torch.int32).tolist(), mm_token_length, mm_token_offsets - class Qwen3VLVisionAttention(Qwen2_5_VLVisionAttention): def __init__(self, model_config, layer_idx): @@ -905,7 +825,6 @@ def __init__( llm_model_config.pretrained_config.architectures = ["Qwen3MoeForCausalLM"] else: raise ValueError(f"Unsupported architecture: {self.original_arch}") - # Qwen3ForCausalLM. self.llm = AutoModelForCausalLM.from_config(llm_model_config) if not _is_disagg(): @@ -1034,16 +953,22 @@ def forward( # NOTE: Qwen*-VL series has mrope_config even on the text-only prompts, # so we need to separate the mm_multimodal_params from the text-only prompts. - mm_multimodal_params = self._get_requests_with_mm_data(multimodal_params) + mm_multimodal_params = [ + multimodal_param + for multimodal_param in multimodal_params + if multimodal_param.multimodal_data.get("image", {}).get("pixel_values") is not None + or multimodal_param.multimodal_data.get("video", {}).get("pixel_values_videos") + is not None + ] if len(mm_multimodal_params) > 0: if not _is_disagg(): mm_embeds = get_multimodal_embeddings( encoder_forward_fn=self.mm_encoder.forward, multimodal_params=mm_multimodal_params, ) - elif not getattr(self, "support_mm_disagg", False): + else: raise NotImplementedError( - f"{type(self)} does not support disaggregated inference yet. Please unset " + "Qwen3VLModel does not support disaggregated inference yet. Please unset " "the TLLM_MULTIMODAL_DISAGGREGATED environment variable, or set it to '0'." ) mm_embeds = find_input_mm_embeds(mm_embeds, mm_multimodal_params) @@ -1083,24 +1008,7 @@ def forward( logger.debug(f"output shape: {output_prob.shape}") return output_prob - def _get_requests_with_mm_data(self, multimodal_params): - mm_multimodal_params = [] - for multimodal_param in multimodal_params: - data = multimodal_param.multimodal_data - if ( - # The first 2 conditions check whether there is input on which inference should be run. - data.get("image", {}).get("pixel_values") is not None - or data.get("video", {}).get("pixel_values_videos") is not None - # This condition corresponds to when the embeddings are already populated, as is e.g. - # the case in EPD disagg in the prefill worker. - or data.get("multimodal_embedding") - ): - mm_multimodal_params.append(multimodal_param) - - return mm_multimodal_params - -@support_multimodal_disaggregated @register_vision_encoder(Qwen3VisionModelBase, vlm_base_model=Qwen3VisionModel) @register_auto_model("Qwen3VLForConditionalGeneration") @register_input_processor( diff --git a/tensorrt_llm/_torch/models/modeling_speculative.py b/tensorrt_llm/_torch/models/modeling_speculative.py index 312d5b1dcaf..dc4b3b1d545 100755 --- a/tensorrt_llm/_torch/models/modeling_speculative.py +++ b/tensorrt_llm/_torch/models/modeling_speculative.py @@ -953,14 +953,6 @@ def forward( hidden_states = hidden_states[:attn_metadata.num_tokens] if self.draft_model is not None: - # For one-model speculative decoding with PP, only the last PP rank - # has valid hidden_states from the target model. The spec_worker (which - # runs the draft model loop) should only run on the last PP rank. - # Non-last PP ranks return None and let the PP sync handle the results. - mapping = self.model.model_config.mapping - if mapping.has_pp() and not mapping.is_last_pp_rank(): - return None - # get logits logits = self.logits_processor.forward( hidden_states[spec_metadata.gather_ids], diff --git a/tensorrt_llm/_torch/speculative/interface.py b/tensorrt_llm/_torch/speculative/interface.py index 99e9468f0c9..59a5e0129cf 100644 --- a/tensorrt_llm/_torch/speculative/interface.py +++ b/tensorrt_llm/_torch/speculative/interface.py @@ -141,9 +141,8 @@ def extend_ctx(self, attention_backend: Type[AttentionBackend]): # 1-model has separate logic for handling draft tokens return False - xqa_supported = get_sm_version() < 120 return not issubclass(attention_backend, - TrtllmAttention) or not xqa_supported + TrtllmAttention) or get_sm_version() < 90 def attention_need_spec_dec_mode( self, @@ -162,16 +161,14 @@ def attention_need_spec_dec_mode( """ is_trtllm_attention = issubclass(attention_backend, TrtllmAttention) - # Always use the multi-token query mode for 1-model if the kernels are available. - xqa_supported = get_sm_version() < 120 - use_case_1 = self.use_one_engine() and xqa_supported + # Always use the multi-token query mode for 1-model. # For 2-model, we need to enable it when we process multiple tokens at once. This occurs with # the target model (verification) or on the first draft for CDL based speculation. - use_case_2 = not self.use_one_engine() and ( - not is_draft_model or - (spec_resource_manager is not None - and spec_resource_manager.is_first_draft - and use_chain_drafter)) and is_trtllm_attention + use_case_1 = self.is_eagle3_one_model() + use_case_2 = (not is_draft_model or + (spec_resource_manager is not None + and spec_resource_manager.is_first_draft + and use_chain_drafter)) and is_trtllm_attention return use_case_1 or use_case_2 diff --git a/tensorrt_llm/evaluate/lm_eval.py b/tensorrt_llm/evaluate/lm_eval.py index 4a877d75f4c..a3a59c3f5f3 100644 --- a/tensorrt_llm/evaluate/lm_eval.py +++ b/tensorrt_llm/evaluate/lm_eval.py @@ -52,9 +52,7 @@ def __init__(self, llm: Union[LLM, PyTorchLLM], sampling_params: Optional[SamplingParams] = None, streaming: bool = False, - chat_template_kwargs: Optional[dict[str, Any]] = None, - model_type: str | None = None, - is_force_single_image: bool = False): + chat_template_kwargs: Optional[dict[str, Any]] = None): super().__init__() self.llm = llm self.sampling_params = sampling_params @@ -165,9 +163,7 @@ def __init__(self, sampling_params: Optional[SamplingParams] = None, streaming: bool = False, max_images: int = 999, - chat_template_kwargs: Optional[dict[str, Any]] = None, - model_type: str | None = None, - is_force_single_image: bool = False): + chat_template_kwargs: Optional[dict[str, Any]] = None): """ Initialize the multimodal wrapper. @@ -183,9 +179,7 @@ def __init__(self, self.MULTIMODAL = True self.max_images = max_images self.chat_template_kwargs = chat_template_kwargs - self.model_type = model_type if model_type is not None else self._get_model_type( - llm) - self.is_force_single_image = is_force_single_image + self.model_type = self._get_model_type(llm) # NOTE: In TRT-LLM, currently we do not support interleaved text and image. Instead, we are adding image placeholders at the end of the text or at the beginning of the text. # So, until we support interleaved text and image, we set this to False. @@ -293,14 +287,9 @@ def generate_until(self, requests, disable_tqdm: bool = False) -> List[str]: prompt = prompt_inputs(prompt) # NOTE: Convert RGBA format to RGB format - if self.is_force_single_image: - # NOTE: This is a workaround to force single image for models which only support single image. - images = [convert_image_mode(media_data["visual"][0], "RGB")] - else: - images = [ - convert_image_mode(img, "RGB") - for img in media_data["visual"] - ] + images = [ + convert_image_mode(img, "RGB") for img in media_data["visual"] + ] prompt["multi_modal_data"] = {"image": images} sampling_params = self._get_sampling_params(gen_kwargs) @@ -440,18 +429,14 @@ def evaluate(self, llm: Union[LLM, PyTorchLLM], sampling_params: Optional[SamplingParams] = None, streaming: bool = False, - scores_filter: str = None, - model_type: str = None, - is_force_single_image: bool = False) -> float: + scores_filter: str = None) -> float: import lm_eval lm_cls = MultimodalLmEvalWrapper if self.MULTIMODAL else LmEvalWrapper results = lm_eval.evaluate( lm=lm_cls(llm, sampling_params=sampling_params, streaming=streaming, - chat_template_kwargs=self.chat_template_kwargs, - model_type=model_type, - is_force_single_image=is_force_single_image), + chat_template_kwargs=self.chat_template_kwargs), task_dict=self.task_dict, limit=self.num_samples, apply_chat_template=self.apply_chat_template, diff --git a/tensorrt_llm/inputs/utils.py b/tensorrt_llm/inputs/utils.py index bbbd5f4f8f2..a6f7e49fa8f 100644 --- a/tensorrt_llm/inputs/utils.py +++ b/tensorrt_llm/inputs/utils.py @@ -774,6 +774,12 @@ def convert_to_conversation_message( mm_placeholder_counts=[mm_placeholder_counts]) input = {"prompt": prompt} + # When the tokenizer is a MistralTokenizer, we need to keep the source media to handle in processor later. + from tensorrt_llm._torch.models.checkpoints.mistral.tokenizer import \ + MistralTokenizer + if isinstance(tokenizer, MistralTokenizer): + input["mm_processor_kwargs"] = {"media": media} + if mm_placeholder_counts: if mm_embeddings is not None: input[ diff --git a/tensorrt_llm/llmapi/llm.py b/tensorrt_llm/llmapi/llm.py index ac869d765a5..6d3410bf3c2 100644 --- a/tensorrt_llm/llmapi/llm.py +++ b/tensorrt_llm/llmapi/llm.py @@ -491,8 +491,8 @@ def generate_async( elif 'multi_modal_embeddings' in inputs: mm_embedding_info = inputs['multi_modal_embeddings'] prompt_token_ids, extra_processed_inputs = cast( - BaseMultimodalInputProcessor, - self.input_processor).attach_multimodal_embeddings( + self.input_processor, + BaseMultimodalInputProcessor).attach_multimodal_embeddings( inputs, mm_embedding_info, sampling_params) else: with nvtx_range_debug("input_processor"): diff --git a/tensorrt_llm/version.py b/tensorrt_llm/version.py index 434ba8f8e28..504baf79de2 100644 --- a/tensorrt_llm/version.py +++ b/tensorrt_llm/version.py @@ -12,4 +12,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "1.2.0rc8" +__version__ = "1.2.0rc7" diff --git a/tests/integration/defs/accuracy/accuracy_core.py b/tests/integration/defs/accuracy/accuracy_core.py index e30c6e2c2cb..f96ac7d6184 100644 --- a/tests/integration/defs/accuracy/accuracy_core.py +++ b/tests/integration/defs/accuracy/accuracy_core.py @@ -402,8 +402,6 @@ class MMMU(AccuracyTask): is_multimodal=True, apply_chat_template=True) - EVALUATE_KWARGS = dict(model_type=None, is_force_single_image=False) - class PassKeyRetrieval64k(AccuracyTask): DATASET = "passkey_retrieval_64k" diff --git a/tests/integration/defs/accuracy/references/mmlu.yaml b/tests/integration/defs/accuracy/references/mmlu.yaml index 9cbd7a9f731..a0e38d67c1f 100644 --- a/tests/integration/defs/accuracy/references/mmlu.yaml +++ b/tests/integration/defs/accuracy/references/mmlu.yaml @@ -345,9 +345,9 @@ mistralai/Mistral-Nemo-12b-Base: - quant_algo: FP8 accuracy: 69.66 mistral/Mistral-Large-3-675B: - - accuracy: 85.30 + - accuracy: 87.54 - spec_dec_algo: Eagle - accuracy: 85.30 + accuracy: 87.54 nvidia/Nemotron-Super-V3: - accuracy: 81.07 - quant_algo: NVFP4 diff --git a/tests/integration/defs/accuracy/references/mmmu.yaml b/tests/integration/defs/accuracy/references/mmmu.yaml index 37819c3f14b..a2fb8f4a777 100644 --- a/tests/integration/defs/accuracy/references/mmmu.yaml +++ b/tests/integration/defs/accuracy/references/mmmu.yaml @@ -25,5 +25,4 @@ microsoft/Phi-4-multimodal-instruct: Qwen/Qwen3-VL-30B-A3B-Instruct: - accuracy: 55.33 mistral/Mistral-Large-3-675B: -# Mistral Large 3 675B only supports single image input, so accuracy is lower. - - accuracy: 47 + - accuracy: 60.00 diff --git a/tests/integration/defs/accuracy/test_llm_api_pytorch_multimodal.py b/tests/integration/defs/accuracy/test_llm_api_pytorch_multimodal.py index c3a812b195b..78e0f3e401c 100644 --- a/tests/integration/defs/accuracy/test_llm_api_pytorch_multimodal.py +++ b/tests/integration/defs/accuracy/test_llm_api_pytorch_multimodal.py @@ -293,19 +293,8 @@ class TestMistralLarge3_675B(LlmapiAccuracyTestHarness): ], ) def test_nvfp4_4gpus( - self, - tp_size, - pp_size, - ep_size, - attention_dp, - cuda_graph, - overlap_scheduler, - moe_backend, - mocker, + self, tp_size, pp_size, ep_size, attention_dp, cuda_graph, overlap_scheduler, moe_backend ): - mocker.patch.dict( - MMMU.EVALUATE_KWARGS, {"model_type": "mistral_large_3", "is_force_single_image": True} - ) pytorch_config = dict( disable_overlap_scheduler=not overlap_scheduler, cuda_graph_config=CudaGraphConfig() if cuda_graph else None, @@ -326,4 +315,4 @@ def test_nvfp4_4gpus( kv_cache_config=kv_cache_config, ) as llm: task = MMMU(self.MODEL_NAME) - task.evaluate(llm, sampling_params=self.sampling_params) + task.evaluate(llm, sampling_params=self.sampling_params, model_type="mistral_large_3") diff --git a/tests/integration/defs/perf/disagg/README.md b/tests/integration/defs/perf/disagg/README.md index 5921900b707..28ba839c6e7 100644 --- a/tests/integration/defs/perf/disagg/README.md +++ b/tests/integration/defs/perf/disagg/README.md @@ -132,141 +132,6 @@ poetry run pytest --disagg test_disagg.py -s -vv -m accuracy poetry run pytest --disagg test_disagg.py -s -vv -k "deepseek-r1-fp4_1k1k" ``` -## Batch Job Submission - -The framework supports automatic batch job submission to maximize parallelism in SLURM cluster environments. Instead of submitting jobs one-by-one, it groups test cases into batches and submits entire batches when needed. - -### Quick Start - -**Default batch size (5 jobs per batch):** -```bash -# Run all tests with default batching -poetry run pytest --disagg test_disagg.py -s -vv - -# Run with test list -poetry run pytest --disagg test_disagg.py -s -vv --disagg-test-list=./testlist/all.txt -``` - -**Custom batch size:** -```bash -# Set batch size via command line -poetry run pytest --disagg test_disagg.py -s -vv --disagg-batch-size=10 - -# Set batch size via environment variable -export DISAGG_BATCH_SIZE=20 -poetry run pytest --disagg test_disagg.py -s -vv - -# Submit all jobs at once (unlimited batch) -poetry run pytest --disagg test_disagg.py -s -vv --disagg-batch-size=0 -``` - -### How Batch Submission Works - -``` -Pytest Collection Phase: - - Collects all test cases (e.g., 100 tests) - - BatchManager splits them into batches (e.g., 20 batches of 5) - -Pytest Execution Phase: - Test 0 runs: - -> Triggers submission of Batch 0 (jobs 0-4) - -> Waits for job 0 to complete - - Test 1-4 run: - -> Batch 0 already submitted, directly wait for completion - - Test 5 runs: - -> Triggers submission of Batch 1 (jobs 5-9) - -> Waits for job 5 to complete - - ... and so on -``` - -### Key Benefits - -- **Parallel Execution**: All jobs in a batch run simultaneously on SLURM cluster -- **Reduced Wait Time**: Total time ≈ MAX(job time) instead of SUM(job times) -- **Automatic Management**: No need to manually split test lists -- **Lazy Loading**: Only submits batches when needed - -### Configuration Options - -**Priority**: Command line option > Environment variable > Default (5) - -**Examples:** - -```bash -# Small batch for quick testing -poetry run pytest --disagg test_disagg.py -s -vv --disagg-batch-size=3 \ - --disagg-test-list=./testlist/debug.txt - -# Large batch for production -poetry run pytest --disagg test_disagg.py -s -vv --disagg-batch-size=50 \ - --disagg-test-list=./testlist/all.txt - -# Submit all at once -poetry run pytest --disagg test_disagg.py -s -vv --disagg-batch-size=0 -``` - -### Timeout Configuration - -The default timeout for waiting for job completion is **10 hours (36000 seconds)**, which accounts for: -- SLURM queue wait time -- Job execution time -- Buffer for delays - -### Performance Comparison - -**Before (Sequential Submission):** -``` -Case 1: submit + wait (1.5h) = 1.5h -Case 2: submit + wait (1.5h) = 1.5h -Case 3: submit + wait (1.5h) = 1.5h -... -Total: 50 × 1.5h = 75 hours -``` - -**After (Batch Submission, batch_size=50):** -``` -Batch 0 (50 jobs): submitted in parallel - Case 1: wait (1.5h) - Case 2-50: wait (0s, already done) - -Total: ~1.5 hours -``` - -**Speedup: 50x** - -### Troubleshooting - -**Check BatchManager initialization:** -``` -====================================================================== -Batch Manager Initialized -Batch size: 5 jobs per batch -====================================================================== - -Total test configs: 20 -Total batches: 4 -``` - -**Monitor batch submission:** -``` -====================================================================== -Submitting Batch 0 -Range: [0:5] (5 jobs) -====================================================================== - - [ 1/5] Job 1234 <- test_config_id_1 - [ 2/5] Job 1235 <- test_config_id_2 - ... -``` - -**If jobs timeout frequently:** -- Check SLURM queue status -- Consider reducing batch size to avoid resource contention -- Verify that timeout (36000s) is sufficient for your workload - ## Test Naming Convention Tests are automatically named using the format: @@ -328,7 +193,6 @@ Test results are saved to: - `GPU_TYPE`: Current GPU type (default: GB200) - `OUTPUT_PATH`: Directory for test results and logs - `WORK_DIR`: Working directory for benchmark execution -- `DISAGG_BATCH_SIZE`: Default batch size for job submission (default: 5) - `DEBUG_MODE`: Enable debug mode (set to "1" to skip job submission) - `DEBUG_JOB_ID`: Job ID to use in debug mode @@ -348,11 +212,10 @@ The framework consists of: 1. **ConfigLoader**: Scans and loads YAML configurations 2. **ConfigValidator**: Validates configuration correctness -3. **BatchManager**: Manages batch job submission for parallel execution -4. **JobManager**: Handles SLURM job submission and monitoring -5. **LogParser**: Extracts metrics from benchmark logs -6. **TestCaseTracker**: Tracks test execution timing -7. **ResultSaver**: Saves results to CSV +3. **JobManager**: Handles SLURM job submission and monitoring +4. **LogParser**: Extracts metrics from benchmark logs +5. **TestCaseTracker**: Tracks test execution timing +6. **ResultSaver**: Saves results to CSV ## Benefits diff --git a/tests/integration/defs/perf/disagg/conftest.py b/tests/integration/defs/perf/disagg/conftest.py index a4b88542dfd..2dabeda1cd9 100644 --- a/tests/integration/defs/perf/disagg/conftest.py +++ b/tests/integration/defs/perf/disagg/conftest.py @@ -1,11 +1,9 @@ """Pytest configuration for disagg tests. Only collects tests in this directory when --disagg parameter is provided. -Provides batch job submission capability to improve parallelism. +Can share options like --disagg-test-list defined in this conftest.py. """ -import os - import pytest from utils.logger import logger @@ -25,15 +23,6 @@ def pytest_addoption(parser): help="Path to a file containing test IDs (one per line) to run. " "Example: pytest --disagg --disagg-test-list=testlist/testlist_gb200.txt", ) - parser.addoption( - "--disagg-batch-size", - action="store", - type=int, - default=None, - help="Number of jobs to submit per batch. Default: from env DISAGG_BATCH_SIZE or 5. " - "Set to 0 for unlimited (submit all at once). " - "Example: pytest --disagg --disagg-batch-size=10", - ) def pytest_collect_directory(path, parent): @@ -56,6 +45,7 @@ def pytest_collect_directory(path, parent): return True # With --disagg parameter, proceed with normal collection + # Can subsequently use --disagg-test-list and other options from main conftest.py for filtering return None @@ -98,7 +88,7 @@ def pytest_collection_modifyitems(config, items): for item in items: # item.nodeid is the full test identifier like: - # "test_disagg.py::TestDisaggBenchmark::test_benchmark[deepseek-r1-fp4:1k1k:...]" + # "test_disagg_simple.py::TestDisaggBenchmark::test_benchmark[deepseek-r1-fp4:1k1k:...]" if item.nodeid in wanted_tests: selected.append(item) else: @@ -122,180 +112,3 @@ def pytest_collection_modifyitems(config, items): logger.warning(f"Please check that the test IDs in {test_list_file} are correct.") logger.info(f"{'=' * 70}\n") - - -class BatchManager: - """Batch job submission manager for disagg tests. - - Automatically splits test cases into batches and submits them on-demand - to maximize parallelism in SLURM cluster environments. - - Key features: - - Lazy batch submission: only submits when needed - - Configurable batch size via CLI or environment variable - - Maintains job_id mapping for all submitted jobs - """ - - def __init__(self, batch_size=5): - """Initialize batch manager. - - Args: - batch_size: Number of jobs per batch. None or 0 means unlimited (submit all at once). - Default is 5 if not specified. - """ - # Normalize batch_size: None, 0, or negative means unlimited - if batch_size is None or batch_size <= 0: - self.batch_size = None - else: - self.batch_size = batch_size - - self.submitted_batches = set() # Track which batch numbers have been submitted - self.job_mapping = {} # Map test_id -> SLURM job_id - self.all_configs = [] # Ordered list of all test configs - - logger.info(f"\n{'=' * 70}") - logger.info("Batch Manager Initialized") - if self.batch_size: - logger.info(f"Batch size: {self.batch_size} jobs per batch") - else: - logger.info("Batch size: unlimited (submit all at once)") - logger.info(f"{'=' * 70}\n") - - def add_config(self, test_config): - """Add a test configuration to the manager. - - Called during initialization to build the ordered list of configs. - - Args: - test_config: TestConfig object to add - """ - self.all_configs.append(test_config) - - def get_job_id(self, test_config): - """Get SLURM job ID for a test config, submitting batch if needed. - - This is the main entry point. It: - 1. Determines which batch the test belongs to - 2. Submits the entire batch if not already submitted - 3. Returns the job_id for this specific test - - Args: - test_config: TestConfig object to get job_id for - - Returns: - str: SLURM job ID, or None if submission failed - """ - # Find the index of this config in the ordered list - try: - idx = next( - i for i, c in enumerate(self.all_configs) if c.test_id == test_config.test_id - ) - except StopIteration: - logger.error(f"Config not found in manager: {test_config.test_id}") - return None - - # Calculate which batch this test belongs to - if self.batch_size: - batch_num = idx // self.batch_size - else: - batch_num = 0 # All tests in one batch - - # Submit the batch if not already submitted - if batch_num not in self.submitted_batches: - self._submit_batch(batch_num) - - # Return the cached job_id - return self.job_mapping.get(test_config.test_id) - - def _submit_batch(self, batch_num): - """Submit all jobs in a specific batch. - - Args: - batch_num: Batch number to submit (0-indexed) - """ - from execution.executor import JobManager - - # Calculate batch range - if self.batch_size: - start_idx = batch_num * self.batch_size - end_idx = min(start_idx + self.batch_size, len(self.all_configs)) - else: - start_idx = 0 - end_idx = len(self.all_configs) - - batch_configs = self.all_configs[start_idx:end_idx] - - logger.info(f"\n{'=' * 70}") - logger.info(f"Submitting Batch {batch_num}") - logger.info(f"Range: [{start_idx}:{end_idx}] ({len(batch_configs)} jobs)") - logger.info(f"{'=' * 70}\n") - - # Submit all jobs in this batch - success_count = 0 - for i, config in enumerate(batch_configs, 1): - try: - success, job_id = JobManager.submit_test_job(config) - if success and job_id: - self.job_mapping[config.test_id] = job_id - success_count += 1 - # Truncate test_id for display - display_id = ( - config.test_id[:60] + "..." if len(config.test_id) > 60 else config.test_id - ) - logger.success(f" [{i:3d}/{len(batch_configs)}] Job {job_id} <- {display_id}") - else: - self.job_mapping[config.test_id] = None - logger.error(f" [{i:3d}/{len(batch_configs)}] Failed: {config.test_id[:50]}") - except Exception as e: - self.job_mapping[config.test_id] = None - logger.error(f" [{i:3d}/{len(batch_configs)}] Error: {e}") - - # Mark batch as submitted - self.submitted_batches.add(batch_num) - - logger.info(f"\n{'=' * 70}") - logger.success( - f"Batch {batch_num} Complete: {success_count}/{len(batch_configs)} succeeded" - ) - logger.info(f"{'=' * 70}\n") - - -@pytest.fixture(scope="session") -def batch_manager(request): - """Provide batch manager fixture for test methods. - - This session-scoped fixture creates and initializes the BatchManager - with all collected test configs. - - Returns: - BatchManager: Initialized batch manager instance - """ - # Get batch size from CLI option or environment variable - batch_size = request.config.getoption("--disagg-batch-size") - if batch_size is None: - env_batch_size = os.getenv("DISAGG_BATCH_SIZE") - if env_batch_size: - try: - batch_size = int(env_batch_size) - except ValueError: - logger.warning(f"Invalid DISAGG_BATCH_SIZE: {env_batch_size}, using default 5") - batch_size = 5 - else: - batch_size = 5 # Default batch size - - # Create batch manager - manager = BatchManager(batch_size=batch_size) - - # Extract all test configs from collected items - for item in request.session.items: - if hasattr(item, "callspec") and "test_config" in item.callspec.params: - manager.add_config(item.callspec.params["test_config"]) - - # Log statistics - logger.info(f"Total test configs: {len(manager.all_configs)}") - if manager.batch_size: - total_batches = (len(manager.all_configs) + manager.batch_size - 1) // manager.batch_size - logger.info(f"Total batches: {total_batches}") - logger.info("") - - return manager diff --git a/tests/integration/defs/perf/disagg/execution/executor.py b/tests/integration/defs/perf/disagg/execution/executor.py index 547b63aa8c4..d454765c536 100644 --- a/tests/integration/defs/perf/disagg/execution/executor.py +++ b/tests/integration/defs/perf/disagg/execution/executor.py @@ -114,9 +114,7 @@ def submit_shell_job( logger.debug(f"Script: {script_path}") logger.debug(f"Log file: {output_log_file}") - # Use check=False to allow submission even with Kerberos warnings - # (mimics submit.py behavior) - output = exec_cmd_with_output(sbatch_args, timeout=60, check=False) + output = exec_cmd_with_output(sbatch_args, timeout=60) job_id = output.strip() # Parse job ID (--parsable returns just the job ID) diff --git a/tests/integration/defs/perf/disagg/execution/subprocess_utils.py b/tests/integration/defs/perf/disagg/execution/subprocess_utils.py index 39a3f0ac4b9..9ab77714267 100644 --- a/tests/integration/defs/perf/disagg/execution/subprocess_utils.py +++ b/tests/integration/defs/perf/disagg/execution/subprocess_utils.py @@ -33,22 +33,19 @@ def exec_cmd(*popenargs, timeout: Optional[float] = None, **kwargs) -> int: return result.returncode -def exec_cmd_with_output( - *popenargs, timeout: Optional[float] = None, check: bool = True, **kwargs -) -> str: +def exec_cmd_with_output(*popenargs, timeout: Optional[float] = None, **kwargs) -> str: """Execute command and return output as string. Args: *popenargs: Command and arguments timeout: Timeout in seconds - check: If True, raise CalledProcessError on non-zero exit code (default: True) **kwargs: Additional subprocess arguments Returns: stdout as string (decoded from bytes) Raises: - subprocess.CalledProcessError: If check=True and command returns non-zero exit code + subprocess.CalledProcessError: If command returns non-zero exit code subprocess.TimeoutExpired: If timeout is reached """ result = subprocess.run( @@ -56,15 +53,11 @@ def exec_cmd_with_output( stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=timeout, - check=check, + check=True, **kwargs, ) - # Log stderr if it exists (as warning if check=False, as error if check=True) + # Log stderr if it exists if result.stderr: - stderr_output = result.stderr.decode().strip() - if stderr_output: - if check: - logger.error(f"Command stderr: {stderr_output}") - else: - logger.warning(f"Command stderr: {stderr_output}") + stderr_output = result.stderr.decode() + logger.error(f"Command stderr: {stderr_output}") return result.stdout.decode() diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-NIXL.yaml index 3c33b288e56..705e683aabf 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-NIXL.yaml @@ -22,7 +22,7 @@ benchmark: multi_round: 8 benchmark_ratio: 0.8 streaming: true - concurrency_list: 512 1024 + concurrency_list: 512 1075 input_length: 1024 output_length: 1024 dataset_file: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX.yaml index 0a6135f34a2..53dcaef3df9 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX.yaml @@ -22,7 +22,7 @@ benchmark: multi_round: 8 benchmark_ratio: 0.8 streaming: true - concurrency_list: 512 1024 + concurrency_list: 512 1075 input_length: 1024 output_length: 1024 dataset_file: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-NIXL.yaml index 3c0b8d2e7a7..29251630c99 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-NIXL.yaml @@ -22,7 +22,7 @@ benchmark: multi_round: 8 benchmark_ratio: 0.8 streaming: true - concurrency_list: '2048' + concurrency_list: '2150' input_length: 1024 output_length: 1024 dataset_file: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-UCX.yaml index 2dd7fd80b2d..409e2284c1f 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-UCX.yaml @@ -22,7 +22,7 @@ benchmark: multi_round: 8 benchmark_ratio: 0.8 streaming: true - concurrency_list: '2048' + concurrency_list: '2150' input_length: 1024 output_length: 1024 dataset_file: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml index fedb8825b21..ce4527e18b8 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml @@ -22,7 +22,7 @@ benchmark: multi_round: 8 benchmark_ratio: 0.8 streaming: true - concurrency_list: '1024' + concurrency_list: '1075' input_length: 1024 output_length: 1024 dataset_file: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml index 57664549800..d1d2792ed46 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml @@ -22,7 +22,7 @@ benchmark: multi_round: 8 benchmark_ratio: 0.8 streaming: true - concurrency_list: '1024' + concurrency_list: '1075' input_length: 1024 output_length: 1024 dataset_file: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml index 4d4f8cb7db2..f2f39b906f8 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml @@ -22,7 +22,7 @@ benchmark: multi_round: 8 benchmark_ratio: 0.8 streaming: true - concurrency_list: '2048' + concurrency_list: '2150' input_length: 1024 output_length: 1024 dataset_file: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml index fc124229436..58df066b581 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml @@ -22,7 +22,7 @@ benchmark: multi_round: 8 benchmark_ratio: 0.8 streaming: true - concurrency_list: '2048' + concurrency_list: '2150' input_length: 1024 output_length: 1024 dataset_file: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml index 60b022a21d0..bdb222ece47 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml @@ -22,7 +22,7 @@ benchmark: multi_round: 8 benchmark_ratio: 0.8 streaming: true - concurrency_list: '1024' + concurrency_list: '1075' input_length: 8192 output_length: 1024 dataset_file: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml index f4cfcda4e67..e1330be1ed0 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml @@ -22,7 +22,7 @@ benchmark: multi_round: 8 benchmark_ratio: 0.8 streaming: true - concurrency_list: '1024' + concurrency_list: '1075' input_length: 8192 output_length: 1024 dataset_file: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml index dafc6a7df79..2fc065b480b 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml @@ -22,7 +22,7 @@ benchmark: multi_round: 8 benchmark_ratio: 0.8 streaming: true - concurrency_list: '512' + concurrency_list: '538' input_length: 8192 output_length: 1024 dataset_file: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml index bb54d661a52..b4efd203978 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml @@ -22,7 +22,7 @@ benchmark: multi_round: 8 benchmark_ratio: 0.8 streaming: true - concurrency_list: '512' + concurrency_list: '538' input_length: 8192 output_length: 1024 dataset_file: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml index 4dd4d7fb462..451a995e303 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml @@ -23,7 +23,7 @@ benchmark: multi_round: 8 benchmark_ratio: 0.8 streaming: true - concurrency_list: '1024' + concurrency_list: '1075' input_length: 1024 output_length: 1024 dataset_file: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml index ca80042c69e..f67ff56f88a 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml @@ -23,7 +23,7 @@ benchmark: multi_round: 8 benchmark_ratio: 0.8 streaming: true - concurrency_list: '2048' + concurrency_list: '2150' input_length: 1024 output_length: 1024 dataset_file: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml index c262e3f6610..4d3a716c675 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml @@ -23,7 +23,7 @@ benchmark: multi_round: 8 benchmark_ratio: 0.8 streaming: true - concurrency_list: '1024' + concurrency_list: '1075' input_length: 8192 output_length: 1024 dataset_file: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml index 0b7bc63e3f0..441aebf189c 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml @@ -23,7 +23,7 @@ benchmark: multi_round: 8 benchmark_ratio: 0.8 streaming: true - concurrency_list: '512' + concurrency_list: '538' input_length: 8192 output_length: 1024 dataset_file: diff --git a/tests/integration/defs/perf/disagg/test_disagg.py b/tests/integration/defs/perf/disagg/test_disagg.py index b60ba851967..39008ca11a1 100644 --- a/tests/integration/defs/perf/disagg/test_disagg.py +++ b/tests/integration/defs/perf/disagg/test_disagg.py @@ -62,7 +62,7 @@ class TestDisaggBenchmark: @pytest.mark.perf @pytest.mark.parametrize("test_config", PERF_TEST_CASES) - def test_benchmark(self, request, batch_manager, test_config: TestConfig): + def test_benchmark(self, request, test_config: TestConfig): """Performance benchmark test for YAML configurations.""" full_test_name = request.node.name @@ -101,14 +101,15 @@ def test_benchmark(self, request, batch_manager, test_config: TestConfig): ) job_id = EnvManager.get_debug_job_id() else: - # Get job_id from batch manager (auto-submits batch if needed) - job_id = batch_manager.get_job_id(test_config) + # Submit job using JobManager + success, job_id = JobManager.submit_test_job(test_config) # Validate submission result - assert job_id, f"Failed to get job_id for {test_config.test_id}" + assert success, f"Job submission failed: {test_config.test_id}" + assert job_id, "Unable to get job ID" - # Wait for completion (timeout: 10 hours = 36000 seconds) - JobManager.wait_for_completion(job_id, 36000, test_config, check_early_failure=True) + # Wait for completion (timeout/early failure handled inside) + JobManager.wait_for_completion(job_id, 7200, test_config, check_early_failure=True) # End tracking test case test_tracker.end_test_case() @@ -135,7 +136,7 @@ def test_benchmark(self, request, batch_manager, test_config: TestConfig): @pytest.mark.accuracy @pytest.mark.parametrize("test_config", ACCURACY_TEST_CASES) - def test_accuracy(self, request, batch_manager, test_config: TestConfig): + def test_accuracy(self, request, test_config: TestConfig): """Accuracy test for YAML configurations.""" full_test_name = request.node.name @@ -178,14 +179,15 @@ def test_accuracy(self, request, batch_manager, test_config: TestConfig): ) job_id = EnvManager.get_debug_job_id() else: - # Get job_id from batch manager (auto-submits batch if needed) - job_id = batch_manager.get_job_id(test_config) + # Submit job using JobManager + success, job_id = JobManager.submit_test_job(test_config) # Validate submission result - assert job_id, f"Failed to get job_id for {test_config.test_id}" + assert success, f"Job submission failed: {test_config.test_id}" + assert job_id, "Unable to get job ID" - # Wait for completion (timeout: 10 hours = 36000 seconds) - JobManager.wait_for_completion(job_id, 36000, test_config, check_early_failure=True) + # Wait for completion (timeout/early failure handled inside) + JobManager.wait_for_completion(job_id, 10800, test_config, check_early_failure=True) # End tracking test case test_tracker.end_test_case() @@ -214,7 +216,7 @@ def test_accuracy(self, request, batch_manager, test_config: TestConfig): @pytest.mark.stress @pytest.mark.parametrize("test_config", STRESS_TEST_CASES) - def test_stress(self, request, batch_manager, test_config: TestConfig): + def test_stress(self, request, test_config: TestConfig): """Stress test combining performance benchmarks and accuracy validation. This test type is designed for stress testing scenarios where both @@ -263,14 +265,15 @@ def test_stress(self, request, batch_manager, test_config: TestConfig): ) job_id = EnvManager.get_debug_job_id() else: - # Get job_id from batch manager (auto-submits batch if needed) - job_id = batch_manager.get_job_id(test_config) + # Submit job using JobManager + success, job_id = JobManager.submit_test_job(test_config) # Validate submission result - assert job_id, f"Failed to get job_id for {test_config.test_id}" + assert success, f"Job submission failed: {test_config.test_id}" + assert job_id, "Unable to get job ID" - # Wait for completion (timeout: 10 hours = 36000 seconds) - JobManager.wait_for_completion(job_id, 36000, test_config, check_early_failure=True) + # Wait for completion (longer timeout for stress tests: 4 hours) + JobManager.wait_for_completion(job_id, 10800, test_config, check_early_failure=True) # End tracking test case test_tracker.end_test_case() diff --git a/tests/integration/defs/test_e2e.py b/tests/integration/defs/test_e2e.py index b55eeb8359d..a4b060eb712 100644 --- a/tests/integration/defs/test_e2e.py +++ b/tests/integration/defs/test_e2e.py @@ -1902,11 +1902,48 @@ def test_ptp_quickstart(llm_root, llm_venv): marks=skip_pre_blackwell), pytest.param( 'GPT-OSS-120B', 'gpt_oss/gpt-oss-120b', marks=skip_pre_blackwell), + ("Llama3.1-8B-bf16-instruct", "llama-3.1-model/Llama-3.1-8B-Instruct"), + pytest.param('Llama3.1-8B-FP4', + 'modelopt-hf-model-hub/Llama-3.1-8B-Instruct-fp4', + marks=skip_pre_blackwell), + pytest.param( + 'Qwen3-8b-fp8', 'Qwen3/nvidia-Qwen3-8B-FP8', marks=skip_pre_blackwell), + pytest.param('Qwen3-8b-nvfp4', + 'Qwen3/nvidia-Qwen3-8B-NVFP4', + marks=skip_pre_blackwell), + ("Qwen3-8B-bf16", "Qwen3/Qwen3-8B"), + pytest.param('Qwen3-14b-fp8', + 'Qwen3/nvidia-Qwen3-14B-FP8', + marks=skip_pre_blackwell), + pytest.param('Qwen3-14b-nvfp4', + 'Qwen3/nvidia-Qwen3-14B-NVFP4', + marks=skip_pre_blackwell), + ("Qwen3-14B-bf16", "Qwen3/Qwen3-14B"), + pytest.param('Qwen3-32b-nvfp4', + 'Qwen3/nvidia-Qwen3-32B-NVFP4', + marks=skip_pre_blackwell), + ("Qwen3-32B-bf16", "Qwen3/Qwen3-32B"), + pytest.param('Phi4-Reasoning-Plus-fp8', + 'nvidia-Phi-4-reasoning-plus-FP8', + marks=skip_pre_blackwell), + pytest.param('Phi4-Reasoning-Plus-nvfp4', + 'nvidia-Phi-4-reasoning-plus-NVFP4', + marks=skip_pre_blackwell), + ("Phi-4-reasoning-plus-bf16", "Phi-4-reasoning-plus"), + pytest.param('Nemotron-Super-49B-v1.5-FP8', + 'nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1_5-FP8', + marks=skip_pre_blackwell), + pytest.param('Llama-4-Scout-17B-16E-FP4', + 'llama4-models/Llama-4-Scout-17B-16E-Instruct-FP4', + marks=skip_pre_blackwell), + pytest.param('Nemotron-Nano-v2-nvfp4', + 'NVIDIA-Nemotron-Nano-9B-v2-NVFP4', + marks=skip_pre_blackwell), ]) def test_ptp_quickstart_advanced(llm_root, llm_venv, model_name, model_path): print(f"Testing {model_name}.") example_root = Path(os.path.join(llm_root, "examples", "llm-api")) - if model_name == "Nemotron-H-8B": + if model_name in ("Nemotron-H-8B", "Nemotron-Nano-v2-nvfp4"): llm_venv.run_cmd([ str(example_root / "quickstart_advanced.py"), "--disable_kv_cache_reuse", @@ -1934,7 +1971,7 @@ def test_ptp_quickstart_advanced(llm_root, llm_venv, model_name, model_path): ] if "Qwen3" in model_name: cmds.append(f"--kv_cache_fraction=0.6") - if "Llama3.1-70B" in model_name: + if "Llama3.1-70B" in model_name or "Llama3.3-70B" in model_name: cmds.append(f"--max_num_tokens=1024") llm_venv.run_cmd(cmds, stdout=running_log) if model_name in mapping: @@ -2053,11 +2090,22 @@ def test_ptp_quickstart_advanced_deepseek_multi_nodes(llm_root, llm_venv, @pytest.mark.parametrize("model_name,model_path,eagle_model_path", [ ("Llama-3.1-8b-Instruct", "llama-3.1-model/Llama-3.1-8B-Instruct", "EAGLE3-LLaMA3.1-Instruct-8B"), + pytest.param('GPT-OSS-120B-Eagle3', + 'gpt_oss/gpt-oss-120b', + 'gpt_oss/gpt-oss-120b-Eagle3', + marks=skip_pre_blackwell), ]) def test_ptp_quickstart_advanced_eagle3(llm_root, llm_venv, model_name, model_path, eagle_model_path): print(f"Testing {model_name}.") example_root = Path(os.path.join(llm_root, "examples", "llm-api")) + + # Set expected memory based on model size + if "GPT-OSS-120B" in model_name: + expected_mem = [106.71, 0, 0, 0] # Memory for 120B model with Eagle3 + else: + expected_mem = [25.2, 0, 0, 0] # Memory for Llama-3.1-8B with Eagle3 + with tempfile.NamedTemporaryFile(mode='w+t', suffix=f".{model_name}.log", dir="./", @@ -2077,7 +2125,7 @@ def test_ptp_quickstart_advanced_eagle3(llm_root, llm_venv, model_name, "--disable_overlap_scheduler", ], stdout=running_log) - _check_mem_usage(running_log, [25.2, 0, 0, 0]) + _check_mem_usage(running_log, expected_mem) @pytest.mark.parametrize("model_name,model_path,eagle_model_path", [ diff --git a/tests/integration/defs/test_unittests.py b/tests/integration/defs/test_unittests.py index 707426f8227..d2cd80692cf 100644 --- a/tests/integration/defs/test_unittests.py +++ b/tests/integration/defs/test_unittests.py @@ -126,8 +126,8 @@ def test_unittests_v2(llm_root, llm_venv, case: str, output_dir, request): f'results-sub-unittests-{case_fn}.xml') command = [ - '-m', 'pytest', ignore_opt, "-vv", "--tb=short", "-rF", - "--timeout=2400", "--timeout-method=thread" + '-m', 'pytest', ignore_opt, "-v", "--tb=short", "-rF", "--timeout=2400", + "--timeout-method=thread" ] if test_prefix: command += [f"--test-prefix={test_prefix}"] diff --git a/tests/integration/test_lists/qa/llm_digits_core.txt b/tests/integration/test_lists/qa/llm_digits_core.txt new file mode 100644 index 00000000000..99ca6f878e3 --- /dev/null +++ b/tests/integration/test_lists/qa/llm_digits_core.txt @@ -0,0 +1,40 @@ +test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-FP8-llama-3.1-model/Llama-3.1-8B-Instruct-FP8] +test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-FP4-modelopt-hf-model-hub/Llama-3.1-8B-Instruct-fp4] +test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-bf16-instruct-llama-3.1-model/Llama-3.1-8B-Instruct] +test_e2e.py::test_ptp_quickstart_advanced[GPT-OSS-20B-gpt_oss/gpt-oss-20b] +test_e2e.py::test_ptp_quickstart_advanced[GPT-OSS-120B-gpt_oss/gpt-oss-120b] +test_e2e.py::test_ptp_quickstart_advanced[Qwen3-8b-fp8-Qwen3/nvidia-Qwen3-8B-FP8] +test_e2e.py::test_ptp_quickstart_advanced[Qwen3-8b-nvfp4-Qwen3/nvidia-Qwen3-8B-NVFP4] +test_e2e.py::test_ptp_quickstart_advanced[Qwen3-8B-bf16-Qwen3/Qwen3-8B] +test_e2e.py::test_ptp_quickstart_advanced[Qwen3-14b-fp8-Qwen3/nvidia-Qwen3-14B-FP8] +test_e2e.py::test_ptp_quickstart_advanced[Qwen3-14b-nvfp4-Qwen3/nvidia-Qwen3-14B-NVFP4] +test_e2e.py::test_ptp_quickstart_advanced[Qwen3-14B-bf16-Qwen3/Qwen3-14B] +test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-image] +test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-audio] +test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-image_audio] +test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-fp4-multimodals/Phi-4-multimodal-instruct-FP4-image] +test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-fp4-multimodals/Phi-4-multimodal-instruct-FP4-audio] +test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-fp4-multimodals/Phi-4-multimodal-instruct-FP4-image_audio] +test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-fp8-multimodals/Phi-4-multimodal-instruct-FP8-image] +test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-fp8-multimodals/Phi-4-multimodal-instruct-FP8-audio] +test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-fp8-multimodals/Phi-4-multimodal-instruct-FP8-image_audio] +test_e2e.py::test_ptp_quickstart_advanced[Phi4-Reasoning-Plus-fp8-nvidia-Phi-4-reasoning-plus-FP8] +test_e2e.py::test_ptp_quickstart_advanced[Phi4-Reasoning-Plus-nvfp4-nvidia-Phi-4-reasoning-plus-NVFP4] +test_e2e.py::test_ptp_quickstart_advanced[Phi-4-reasoning-plus-bf16-Phi-4-reasoning-plus] +test_e2e.py::test_ptp_quickstart_advanced[Qwen3-32B-bf16-Qwen3/Qwen3-32B] +test_e2e.py::test_ptp_quickstart_advanced[Qwen3-32b-nvfp4-Qwen3/nvidia-Qwen3-32B-NVFP4] +test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Nano-v2-nvfp4-NVIDIA-Nemotron-Nano-9B-v2-NVFP4] +test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Super-49B-v1.5-FP8-nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1_5-FP8] +test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B-Qwen3/Qwen3-30B-A3B] +test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B_nvfp4_hf-Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf] +test_e2e.py::test_ptp_quickstart_advanced[Llama3.3-70B-FP8-modelopt-hf-model-hub/Llama-3.3-70B-Instruct-fp8] +test_e2e.py::test_ptp_quickstart_advanced[Llama3.3-70B-FP4-modelopt-hf-model-hub/Llama-3.3-70B-Instruct-fp4] + +accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_auto_dtype +accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_nvfp4 +accuracy/test_llm_api_pytorch_multimodal.py::TestQwen2_5_VL_7B::test_auto_dtype +accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[latency_moe_cutlass-torch_compile=False] +accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[latency_moe_cutlass-torch_compile=True] +accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_fp8 +accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_fp4 +accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype diff --git a/tests/integration/test_lists/qa/llm_digits_func.txt b/tests/integration/test_lists/qa/llm_digits_func.txt index 30e3f223846..aba46316e26 100644 --- a/tests/integration/test_lists/qa/llm_digits_func.txt +++ b/tests/integration/test_lists/qa/llm_digits_func.txt @@ -1,25 +1,44 @@ -test_e2e.py::test_ptp_quickstart -test_e2e.py::test_ptp_quickstart_advanced_mixed_precision #Llama-3_1-8B-Instruct_fp8_nvfp4_hf -test_e2e.py::test_ptp_star_attention_example[Llama3.1-8B-BF16-llama-3.1-model/Meta-Llama-3.1-8B] -test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-BF16-llama-3.1-model/Meta-Llama-3.1-8B] -test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-NVFP4-nvfp4-quantized/Meta-Llama-3.1-8B] +test_e2e.py::test_ptp_quickstart_advanced[GPT-OSS-20B-gpt_oss/gpt-oss-20b] +test_e2e.py::test_ptp_quickstart_advanced[GPT-OSS-120B-gpt_oss/gpt-oss-120b] +test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-bf16-instruct-llama-3.1-model/Llama-3.1-8B-Instruct] test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-FP8-llama-3.1-model/Llama-3.1-8B-Instruct-FP8] -test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Super-49B-v1-FP8-nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1-FP8] -test_e2e.py::test_ptp_quickstart_advanced[Mixtral-8x7B-NVFP4-nvfp4-quantized/Mixtral-8x7B-Instruct-v0.1] -test_e2e.py::test_ptp_quickstart_advanced[Mixtral-8x7B-FP8-Mixtral-8x7B-Instruct-v0.1-fp8] -test_e2e.py::test_ptp_quickstart_advanced_eagle3[Llama-3.1-8b-Instruct-llama-3.1-model/Llama-3.1-8B-Instruct-EAGLE3-LLaMA3.1-Instruct-8B] +test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-FP4-modelopt-hf-model-hub/Llama-3.1-8B-Instruct-fp4] +test_e2e.py::test_ptp_quickstart_advanced[Qwen3-8b-fp8-Qwen3/nvidia-Qwen3-8B-FP8] +test_e2e.py::test_ptp_quickstart_advanced[Qwen3-8b-nvfp4-Qwen3/nvidia-Qwen3-8B-NVFP4] +test_e2e.py::test_ptp_quickstart_advanced[Qwen3-8B-bf16-Qwen3/Qwen3-8B] +test_e2e.py::test_ptp_quickstart_advanced[Qwen3-14b-fp8-Qwen3/nvidia-Qwen3-14B-FP8] +test_e2e.py::test_ptp_quickstart_advanced[Qwen3-14b-nvfp4-Qwen3/nvidia-Qwen3-14B-NVFP4] +test_e2e.py::test_ptp_quickstart_advanced[Qwen3-14B-bf16-Qwen3/Qwen3-14B] +test_e2e.py::test_ptp_quickstart_advanced[Qwen3-32B-bf16-Qwen3/Qwen3-32B] +test_e2e.py::test_ptp_quickstart_advanced[Qwen3-32b-nvfp4-Qwen3/nvidia-Qwen3-32B-NVFP4] +test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B-Qwen3/Qwen3-30B-A3B] +test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B_nvfp4_hf-Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf] +test_e2e.py::test_ptp_quickstart_advanced[Phi4-Reasoning-Plus-fp8-nvidia-Phi-4-reasoning-plus-FP8] +test_e2e.py::test_ptp_quickstart_advanced[Phi4-Reasoning-Plus-nvfp4-nvidia-Phi-4-reasoning-plus-NVFP4] +test_e2e.py::test_ptp_quickstart_advanced[Phi-4-reasoning-plus-bf16-Phi-4-reasoning-plus] test_e2e.py::test_ptp_quickstart_advanced[Llama3.3-70B-FP8-modelopt-hf-model-hub/Llama-3.3-70B-Instruct-fp8] test_e2e.py::test_ptp_quickstart_advanced[Llama3.3-70B-FP4-modelopt-hf-model-hub/Llama-3.3-70B-Instruct-fp4] -test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Super-49B-v1-BF16-nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1] -test_e2e.py::test_ptp_quickstart_advanced[Mixtral-8x7B-BF16-Mixtral-8x7B-Instruct-v0.1] -test_e2e.py::test_ptp_quickstart_advanced[Mistral-Nemo-12b-Base-Mistral-Nemo-Base-2407] +test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-70B-FP8-llama-3.1-model/Llama-3.1-70B-Instruct-FP8] +test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Super-49B-v1.5-FP8-nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1_5-FP8] +test_e2e.py::test_ptp_quickstart_advanced[Llama-4-Scout-17B-16E-FP4-llama4-models/Llama-4-Scout-17B-16E-Instruct-FP4] test_e2e.py::test_ptp_quickstart_advanced[DeepSeek-R1-Distill-Qwen-32B-DeepSeek-R1/DeepSeek-R1-Distill-Qwen-32B] +test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Nano-v2-nvfp4-NVIDIA-Nemotron-Nano-9B-v2-NVFP4] +test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-image] +test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-audio] +test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-image_audio] +test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-fp4-multimodals/Phi-4-multimodal-instruct-FP4-image] +test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-fp4-multimodals/Phi-4-multimodal-instruct-FP4-audio] +test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-fp4-multimodals/Phi-4-multimodal-instruct-FP4-image_audio] +test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-fp8-multimodals/Phi-4-multimodal-instruct-FP8-image] +test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-fp8-multimodals/Phi-4-multimodal-instruct-FP8-audio] +test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-fp8-multimodals/Phi-4-multimodal-instruct-FP8-image_audio] +test_e2e.py::test_ptp_quickstart_advanced_eagle3[GPT-OSS-120B-Eagle3-gpt_oss/gpt-oss-120b-gpt_oss/gpt-oss-120b-Eagle3] -accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_llm_sampler -accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_eagle3[sampler_async_worker=False-eagle3_one_model=False-overlap_scheduler=False] -accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_eagle3[sampler_async_worker=False-eagle3_one_model=True-overlap_scheduler=True] -accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_eagle3[sampler_async_worker=True-eagle3_one_model=True-overlap_scheduler=True] -accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_ngram - -accuracy/test_llm_api_pytorch_multimodal.py::TestNVILA_8B::test_auto_dtype -accuracy/test_llm_api_pytorch_multimodal.py::TestVILA1_5_3B::test_auto_dtype +accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_auto_dtype +accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_nvfp4 +accuracy/test_llm_api_pytorch_multimodal.py::TestQwen2_5_VL_7B::test_auto_dtype +accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[latency_moe_cutlass-torch_compile=False] +accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[latency_moe_cutlass-torch_compile=True] +accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_fp8 +accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_fp4 +accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype diff --git a/tests/integration/test_lists/test-db/l0_gb200_multi_gpus.yml b/tests/integration/test_lists/test-db/l0_gb200_multi_gpus.yml index 62c0af24f8b..2241aea415a 100644 --- a/tests/integration/test_lists/test-db/l0_gb200_multi_gpus.yml +++ b/tests/integration/test_lists/test-db/l0_gb200_multi_gpus.yml @@ -72,7 +72,7 @@ l0_gb200_multi_gpus: - accuracy/test_llm_api_pytorch.py::TestQwen3NextInstruct::test_nvfp4[tp4ep4-cutlass] - accuracy/test_llm_api_pytorch.py::TestQwen3NextInstruct::test_nvfp4[no_cuda_graph_overlap-cutlass] - accuracy/test_llm_api_pytorch.py::TestQwen3NextInstruct::test_nvfp4[tp4ep4-trtllm] - - accuracy/test_llm_api_pytorch_multimodal.py::TestMistralLarge3_675B::test_nvfp4_4gpus[latency_moe_trtllm] TIMEOUT (90) + - accuracy/test_llm_api_pytorch.py::TestMistralLarge3_675B::test_nvfp4_4gpus[latency_moe_trtllm_eagle] TIMEOUT (90) - condition: ranges: system_gpu_count: @@ -105,4 +105,4 @@ l0_gb200_multi_gpus: - accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus_online_eplb[enable_configurable_moe-fp8] - accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4_4gpus[latency_moe_trtllm_eagle3] TIMEOUT (90) - accuracy/test_llm_api_pytorch.py::TestMistralLarge3_675B::test_nvfp4_4gpus[latency_moe_trtllm] TIMEOUT (90) - - accuracy/test_llm_api_pytorch.py::TestMistralLarge3_675B::test_nvfp4_4gpus[latency_moe_trtllm_eagle] TIMEOUT (90) + - accuracy/test_llm_api_pytorch_multimodal.py::TestMistralLarge3_675B::test_nvfp4_4gpus[latency_moe_trtllm] TIMEOUT (90) diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index ed85622c5e1..a71d0475c03 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -370,6 +370,7 @@ accuracy/test_llm_api_pytorch_multimodal.py::TestQwen2_5_VL_7B::test_auto_dtype accuracy/test_llm_api_pytorch_multimodal.py::TestLlava_V1_6_Mistral_7B::test_auto_dtype SKIP (https://nvbugs/5707087) accuracy/test_llm_api_pytorch_multimodal.py::TestPhi4MMFusedVisionLora::test_auto_dtype SKIP (https://nvbugs/5707087) disaggregated/test_disaggregated.py::test_disaggregated_ctxtp2pp2_gentp2pp2[TinyLlama-1.1B-Chat-v1.0] SKIP (https://nvbugs/5705199) +unittest/_torch/modules/test_fused_moe.py::test_fused_moe_fp8_blockwise_wide_ep[NotEnabled] SKIP (https://nvbugs/5707392) accuracy/test_llm_api_pytorch.py::TestLlama3_3NemotronSuper49Bv1::test_auto_dtype_tp2 SKIP (https://nvbugs/5707145) accuracy/test_llm_api_pytorch.py::TestLlama3_3NemotronSuper49Bv1::test_fp8_prequantized_tp2 SKIP (https://nvbugs/5707145) accuracy/test_llm_api_pytorch.py::TestNemotronH_56B_Base::test_auto_dtype[tp8-cuda_graph=True] SKIP (https://nvbugs/5640697) @@ -382,6 +383,8 @@ accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUT accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_eagle3_2gpus[cutlass-two_model-overlap_scheduler] SKIP (https://nvbugs/5702826) accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_eagle3_4gpus[cutlass-two_model-overlap_scheduler] SKIP (https://nvbugs/5702826) unittest/llmapi/test_llm_pytorch.py::test_llm_reward_model SKIP (https://nvbugs/5670458) +accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_cutlass] SKIP (https://nvbugs/5740377) +accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[dep4_latency_moe_trtllm-torch_compile=False] SKIP (https://nvbugs/5740377) accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_guided_decoding[llguidance-mtp_nextn=2] SKIP (https://nvbugs/5740075) accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_guided_decoding[xgrammar-mtp_nextn=2] SKIP (https://nvbugs/5740075) unittest/_torch/modeling/test_modeling_out_of_tree.py::TestOutOfTree::test_llm_api[False] SKIP (https://nvbugs/5739981) @@ -393,12 +396,16 @@ accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_tr accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_4gpus[tp4-fp8kv=True-attn_backend=FLASHINFER-torch_compile=True] SKIP (https://nvbugs/5741304) unittest/_torch/multi_gpu/test_allreduce.py::test_allreduce_fusion_patterns[2-residual_rms_norm_out_quant_fp8-hidden:7168-seqlen:8192] SKIP (https://nvbugs/5741392) unittest/executor/test_rpc.py::TestRpcCorrectness::test_incremental_task_async SKIP (https://nvbugs/5741476) +accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_4gpus[pp4-fp8kv=True-attn_backend=TRTLLM-torch_compile=False] SKIP (https://nvbugs/5740377) +accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[dep4_latency_moe_cutlass-torch_compile=False] SKIP (https://nvbugs/5740377) examples/test_phi.py::test_phi_fp8_with_bf16_lora[phi-2] SKIP (https://nvbugs/5744293) examples/test_phi.py::test_llm_phi_1node_2gpus_summary[Phi-3.5-MoE-instruct-nb:1] SKIP (https://nvbugs/5744293) examples/test_phi.py::test_llm_phi_quantization_1gpu[phi-2-fp8-bfloat16] SKIP (https://nvbugs/5744293) test_e2e.py::test_trtllm_bench_llmapi_launch[pytorch_backend-llama-v3-llama3-8b] SKIP (https://nvbugs/5744432) test_e2e.py::test_trtllm_serve_multimodal_example SKIP (https://nvbugs/5747920) test_e2e.py::test_trtllm_serve_example SKIP (https://nvbugs/5747938) +unittest/_torch/auto_deploy/unit/singlegpu/test_ad_build_small_single.py::test_build_ad[meta-llama/Llama-4-Scout-17B-16E-Instruct-llm_extra_args8] SKIP (https://nvbugs/5747878) +unittest/_torch/auto_deploy/unit/singlegpu/test_ad_build_small_single.py::test_build_ad[meta-llama/Llama-4-Scout-17B-16E-Instruct-llm_extra_args9] SKIP (https://nvbugs/5747878) triton_server/test_triton.py::test_opt[opt] SKIP (https://nvbugs/5739981) unittest/llmapi/test_llm_pytorch.py::test_tinyllama_logits_processor[False] SKIP (https://nvbugs/5771838) unittest/llmapi/test_llm_pytorch.py::test_tinyllama_logits_processor[True] SKIP (https://nvbugs/5771838) @@ -409,6 +416,8 @@ accuracy/test_cli_flow.py::TestPhi3Mini128kInstruct::test_auto_dtype SKIP (https accuracy/test_cli_flow.py::TestPhi3Small8kInstruct::test_auto_dtype SKIP (https://nvbugs/5744293) accuracy/test_cli_flow.py::TestPhi3Small128kInstruct::test_auto_dtype SKIP (https://nvbugs/5744293) accuracy/test_cli_flow.py::TestPhi3_5MiniInstruct::test_auto_dtype SKIP (https://nvbugs/5744293) +unittest/_torch/auto_deploy/unit/singlegpu/models/test_llama4_vlm_patch.py::test_build_run_llama4_vlm SKIP (https://nvbugs/5747878) +accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus_online_eplb[fp8kv=True-moe_backend=TRTLLM] SKIP (https://nvbugs/5740377) cpp/test_multi_gpu.py::TestDisagg::test_symmetric_executor[gpt-2proc-mpi_kvcache-90] SKIP (https://nvbugs/5755941) examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:1-pp:1-float16-BertForQuestionAnswering-bert/bert-base-cased-squad2] SKIP (https://nvbugs/5608979) examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:1-pp:1-float16-RobertaForQuestionAnswering-bert/roberta-base-squad2] SKIP (https://nvbugs/5608979) @@ -441,9 +450,11 @@ test_e2e.py::test_ptp_quickstart_advanced_2gpus_sm120[Nemotron-Super-49B-v1-BF16 unittest/_torch/multi_gpu/test_mnnvl_allreduce.py::test_row_linear_residual_norm_fusion[no_fusion-strategy:8-dtype:bfloat16-hidden:8192-seqlen:[15]] SKIP (https://nvbugs/5761364) triton_server/test_triton.py::test_gpt_speculative_decoding[gpt-speculative-decoding] SKIP (https://nvbugs/5762854) accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B_Instruct_RocketKV::test_auto_dtype SKIP (https://nvbugs/5762822) +accuracy/test_llm_api_pytorch.py::TestMistralLarge3_675B::test_nvfp4_4gpus[latency_moe_trtllm] SKIP (https://nvbugs/5762852) +accuracy/test_llm_api_pytorch.py::TestMistralLarge3_675B::test_nvfp4_4gpus[latency_moe_trtllm_eagle] SKIP (https://nvbugs/5762852) unittest/_torch/sampler/test_return_logits.py SKIP (https://nvbugs/5764627) examples/serve/test_serve.py::test_config_file_loading[--config] SKIP (https://nvbugs/5754977) -full:RTXPro6000D/accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=2-tp2pp2-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] SKIP (https://nvbugspro.nvidia.com/bug/5794313) +full:RTXPro6000D/accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=2-tp2pp2-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] SKIP (https://nvbugs/5740075) examples/test_ray.py::test_ray_disaggregated_serving[tp2] SKIP (https://nvbugs/5612502) unittest/executor/test_rpc_proxy.py SKIP (https://nvbugs/5605741) unittest/executor/test_rpc_worker.py SKIP (https://nvbugs/5605741) @@ -454,6 +465,7 @@ full:sm89/accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ngram SKIP (https://nvbugs/5768068) test_e2e.py::test_eagle3_output_consistency_4gpus[Qwen3/saved_models_Qwen3-235B-A22B_fp8_hf-Qwen3/qwen3-235B-eagle3] SKIP (https://nvbugs/5685010) examples/test_mistral.py::test_mistral_with_bf16_lora_torch[mistral-7b-v0.1] SKIP (https://nvbugs/5769855) +accuracy/test_llm_api_pytorch_multimodal.py::TestMistralLarge3_675B::test_nvfp4_4gpus[latency_moe_trtllm] SKIP (TBD) accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-trtllm-fp8] SKIP (https://nvbugs/5772396) full:sm100/accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-trtllm-auto] SKIP (https://nvbugs/5772396) accuracy/test_llm_api_pytorch.py::TestGLM4_6::test_nvfp4_2_model_mtp[2model_trtllm] SKIP (https://nvbugs/5772360) @@ -468,13 +480,13 @@ accuracy/test_cli_flow.py::TestVicuna7B::test_eagle_2[cuda_graph=True-chunked_co accuracy/test_disaggregated_serving.py::TestGPTOSS::test_auto_dtype[True] SKIP (https://nvbugs/5596343) accuracy/test_disaggregated_serving.py::TestGPTOSS::test_auto_dtype[False] SKIP (https://nvbugs/5596343) accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_eagle3_tp8[eagle3_one_model=False-torch_compile=True] SKIP (https://nvbugs/5775326) -accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_nvfp4_tp4[torch_compile=False] SKIP (https://nvbugs/5794796) accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_fp8[throughput_latency] SKIP (https://nvbugs/5775544) accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B_Instruct_2507::test_skip_softmax_attention[target_sparsity_0.5] SKIP (https://nvbugs/5774869) accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B_Instruct_2507::test_skip_softmax_attention[target_sparsity_0.0] SKIP (https://nvbugs/5774869) accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B_Instruct_2507::test_skip_softmax_attention[target_sparsity_0.9] SKIP (https://nvbugs/5774869) triton_server/test_triton.py::test_llava_onevision[llava_onevision] SKIP (https://nvbugs/5775205) triton_server/test_triton.py::test_gpt_ib_lad[gpt-ib-lad] SKIP (https://nvbugs/5775223) +accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[dep4_latency_moe_trtllm-torch_compile=True] SKIP (https://nvbugs/5740377) unittest/_torch/modules/test_fused_moe.py::test_fused_moe_fp8_blockwise_cute_dsl_multi_gpu[MoEWeightLoadingMode.FUSED_GATE_UP_PROJ-DefaultMoeRoutingMethod-1] SKIP (https://nvbugs/5775256) unittest/_torch/auto_deploy/unit/multigpu/transformations/library/test_ep_sharding.py::test_ep_shard[3-2] SKIP (https://nvbugs/5777041) unittest/_torch/auto_deploy/unit/multigpu/transformations/library/test_ep_sharding.py::test_ep_shard[8-2] SKIP (https://nvbugs/5777041) @@ -494,6 +506,7 @@ unittest/_torch/modules/test_fused_moe.py::test_fused_moe_multi_gpu[1-CUTLASS] S accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_4gpus[tp4-fp8kv=False-attn_backend=FLASHINFER-torch_compile=False] SKIP (https://nvbugs/5707359) accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ctx_pp_gen_tp_asymmetric[GSM8K-gen_tp=1-ctx_pp=2] SKIP (https://nvbugs/5673559) accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[ep4-mtp_nextn=0-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] SKIP (https://nvbugs/5701445) +accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[pp4-mtp_nextn=2-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False] SKIP (https://nvbugs/5740075) accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_guided_decoding[llguidance-mtp_nextn=0] SKIP (https://nvbugs/5748600) unittest/_torch/ray_orchestrator/multi_gpu/test_multi_instance.py::test_multi_instance[tp2_2instances] SKIP (https://nvbugs/5784566) disaggregated/test_auto_scaling.py::test_worker_restart[etcd-round_robin] SKIP (https://nvbugs/5776445) diff --git a/tests/unittest/_torch/auto_deploy/_utils_test/_model_test_utils.py b/tests/unittest/_torch/auto_deploy/_utils_test/_model_test_utils.py index a71a09b4652..af821955d49 100644 --- a/tests/unittest/_torch/auto_deploy/_utils_test/_model_test_utils.py +++ b/tests/unittest/_torch/auto_deploy/_utils_test/_model_test_utils.py @@ -278,8 +278,8 @@ def forward(self, x): def generate_dynamic_shapes(max_batch_size, max_seq_len): dynamic_shapes = ( { - 0: Dim.DYNAMIC, - 1: Dim.DYNAMIC, + 0: Dim("batch_size", max=max_batch_size), + 1: Dim("seq_len", max=max_seq_len), }, ) return dynamic_shapes @@ -393,7 +393,7 @@ def apply_rotary_pos_emb_ds(q, k, cos, sin, position_ids, unsqueeze_dim=1): }, }, "meta-llama/Llama-4-Scout-17B-16E-Instruct": { - "llm_models_subdir": "llama4-models/Llama-4-Scout-17B-16E-Instruct", + "llm_models_subdir": "Llama-4-Scout-17B-16E-Instruct", "model_factory": "AutoModelForImageTextToText", "model_kwargs": { "text_config": { diff --git a/tests/unittest/_torch/auto_deploy/unit/singlegpu/models/test_hybrid_patches.py b/tests/unittest/_torch/auto_deploy/unit/singlegpu/models/test_hybrid_patches.py index 430add5a282..6ea5c0efa17 100644 --- a/tests/unittest/_torch/auto_deploy/unit/singlegpu/models/test_hybrid_patches.py +++ b/tests/unittest/_torch/auto_deploy/unit/singlegpu/models/test_hybrid_patches.py @@ -72,11 +72,12 @@ def test_bamba_patches( position_ids = torch.arange(input_ids.shape[1], device=input_ids.device).repeat( input_ids.shape[0], 1 ) - batch_size_dynamic = Dim.DYNAMIC - seq_len_dynamic = Dim.DYNAMIC dynamic_shapes = ( - {0: batch_size_dynamic, 1: seq_len_dynamic}, - {0: batch_size_dynamic, 1: seq_len_dynamic}, + {0: Dim("batch_size", min=0, max=8), 1: Dim("seq_len", min=0, max=512)}, + { + 0: Dim("batch_size", min=0, max=8), + 1: Dim("seq_len", min=0, max=512), + }, ) def _run_torch_export_to_gm(): diff --git a/tests/unittest/_torch/auto_deploy/unit/singlegpu/models/test_modeling_nemotron_h.py b/tests/unittest/_torch/auto_deploy/unit/singlegpu/models/test_modeling_nemotron_h.py index d5d624e7211..94b22ed14fc 100644 --- a/tests/unittest/_torch/auto_deploy/unit/singlegpu/models/test_modeling_nemotron_h.py +++ b/tests/unittest/_torch/auto_deploy/unit/singlegpu/models/test_modeling_nemotron_h.py @@ -184,11 +184,12 @@ def test_custom_model_implementation_can_be_exported( position_ids = torch.arange(input_ids.shape[1], device=input_ids.device).repeat( input_ids.shape[0], 1 ) - batch_size_dynamic = Dim.DYNAMIC - seq_len_dynamic = Dim.DYNAMIC dynamic_shapes = ( - {0: batch_size_dynamic, 1: seq_len_dynamic}, - {0: batch_size_dynamic, 1: seq_len_dynamic}, + {0: Dim("batch_size", min=0, max=8), 1: Dim("seq_len", min=0, max=512)}, + { + 0: Dim("batch_size", min=0, max=8), + 1: Dim("seq_len", min=0, max=512), + }, ) def _run_torch_export_to_gm(): diff --git a/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_attention_matcher.py b/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_attention_matcher.py index 40a331025a2..c3b18303171 100644 --- a/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_attention_matcher.py +++ b/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_attention_matcher.py @@ -84,7 +84,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: return output def get_dynamic_shapes(self): - return {0: Dim.DYNAMIC, 1: Dim.DYNAMIC} + return {0: Dim("batch_size", max=8), 1: Dim("seq_len", min=4, max=16)} class RepeatKVModel2(RepeatKVModel): @@ -185,7 +185,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: return output def get_dynamic_shapes(self): - return {0: Dim.DYNAMIC, 1: Dim.DYNAMIC} + return {0: Dim("batch_size", max=8), 1: Dim("seq_len", min=4, max=16)} class ComplexEagerAttentionModel(torch.nn.Module): @@ -274,7 +274,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: return output def get_dynamic_shapes(self): - return {0: Dim.DYNAMIC, 1: Dim.DYNAMIC} + return {0: Dim("batch_size", max=8), 1: Dim("seq_len", min=4, max=16)} class CounterExampleModel(torch.nn.Module): @@ -329,7 +329,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: return features_case1 def get_dynamic_shapes(self): - return {0: Dim.DYNAMIC, 1: Dim.DYNAMIC} + return {0: Dim("batch_size", max=8), 1: Dim("seq_len", min=4, max=16)} class GroupedAttentionModel(torch.nn.Module): @@ -403,7 +403,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: return output def get_dynamic_shapes(self): - return {0: Dim.DYNAMIC, 1: Dim.DYNAMIC} + return {0: Dim("batch_size", max=8), 1: Dim("seq_len", min=4, max=16)} def _get_match_repeat_kv_optimizer() -> Callable: @@ -907,7 +907,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: return output def get_dynamic_shapes(self): - return {0: Dim.DYNAMIC, 1: Dim.DYNAMIC} + return {0: Dim("batch_size", max=8), 1: Dim("seq_len", min=4, max=16)} class Llama3CausalAttentionModel(torch.nn.Module): @@ -1013,7 +1013,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: return output def get_dynamic_shapes(self): - return {0: Dim.DYNAMIC, 1: Dim.DYNAMIC} + return {0: Dim("batch_size", max=8), 1: Dim("seq_len", min=4, max=16)} class AttentionLayoutModel(torch.nn.Module): @@ -1102,7 +1102,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: return output def get_dynamic_shapes(self): - return {0: Dim.DYNAMIC, 1: Dim.DYNAMIC} + return {0: Dim("batch_size", max=8), 1: Dim("seq_len", min=4, max=16)} class BsndAttentionModel(AttentionLayoutModel): diff --git a/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_attention_matcher_hf.py b/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_attention_matcher_hf.py index 661f1863ee0..3ae7775c6af 100644 --- a/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_attention_matcher_hf.py +++ b/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_attention_matcher_hf.py @@ -117,7 +117,7 @@ def verify_matcher(gm: GraphModule): "attn_implementation": attn_implementation, **config, } - dynamic_shapes = {0: Dim.DYNAMIC, 1: Dim.DYNAMIC} + dynamic_shapes = {0: Dim("batch_size", max=8), 1: Dim("seq_len", min=2, max=8)} # Build and export model on meta device with init_empty_weights(): diff --git a/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_fuse_rmsnorm.py b/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_fuse_rmsnorm.py index d354f9d50fe..fe88a866f55 100644 --- a/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_fuse_rmsnorm.py +++ b/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_fuse_rmsnorm.py @@ -59,7 +59,7 @@ def checker(gm): return any(is_op(n, op) for n in gm.graph.nodes) x = torch.randn(2, 1024, device="cuda", dtype=torch.float16) - dynamic_shapes = {0: Dim.DYNAMIC} + dynamic_shapes = {0: Dim("batch_size", max=8)} gm = torch_export_to_gm(model, args=(x,), dynamic_shapes=(dynamic_shapes,), clone=True) gm_transformed = InferenceOptimizer( None, diff --git a/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_fused_add_rms_norm.py b/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_fused_add_rms_norm.py index 6926e980c9d..8cfb59756a5 100644 --- a/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_fused_add_rms_norm.py +++ b/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_fused_add_rms_norm.py @@ -37,9 +37,8 @@ def checker(gm): residual = torch.randn(bsz, seq_len, hidden, device="cuda", dtype=torch.bfloat16) # Dynamic shapes - dyn_batch_size = Dim.DYNAMIC - ds_x = {0: dyn_batch_size} - ds_res = {0: dyn_batch_size} + ds_x = {0: Dim("batch_size", max=8)} + ds_res = {0: Dim("batch_size", max=8)} gm = torch_export_to_gm(model, args=(x, residual), dynamic_shapes=(ds_x, ds_res), clone=True) diff --git a/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_gather_logits_before_lm_head.py b/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_gather_logits_before_lm_head.py index abf5d6e1d53..42cd57752c0 100644 --- a/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_gather_logits_before_lm_head.py +++ b/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_gather_logits_before_lm_head.py @@ -189,7 +189,7 @@ def test_transform_generate_format(self, batch_size): else: # dynamic_shapes should be a tuple matching the number of positional args dynamic_shapes = ( - {0: Dim.DYNAMIC}, # hidden_states + {0: Dim("batch_size", min=1, max=max_batch_size)}, # hidden_states None, # logit_gather_ids (static) None, # seq_len (static) ) diff --git a/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_rope_transformation.py b/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_rope_transformation.py index 291cd377bd9..ffa2c0ccd8f 100644 --- a/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_rope_transformation.py +++ b/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_rope_transformation.py @@ -135,7 +135,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: return out.to(torch.float16) if self.mode == "match" else out def get_dynamic_shapes(self): - return {0: Dim.DYNAMIC, 1: Dim.DYNAMIC} + return {0: Dim("batch_size", max=8), 1: Dim("seq_len", max=16)} @pytest.mark.parametrize( @@ -387,7 +387,7 @@ def forward(self, x): return torch.cat([q_out, k_out], dim=-1) def get_dynamic_shapes(self): - return {0: Dim.DYNAMIC, 1: Dim.DYNAMIC} + return {0: Dim("batch_size", max=8), 1: Dim("seq_len", max=16)} @pytest.mark.parametrize( diff --git a/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/test_export.py b/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/test_export.py index 7a5b5e24460..3c28697f3b1 100644 --- a/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/test_export.py +++ b/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/test_export.py @@ -47,7 +47,7 @@ def get_sample_input(self): return torch.randn(2, 10) def get_dynamic_shapes(self): - return {0: Dim.DYNAMIC} + return {0: Dim("batch_size", max=100)} class MLPDuplicate(ModuleForExport): @@ -72,7 +72,7 @@ def get_deduplicated_keys(self) -> Set[str]: return {"fc3.weight"} def get_dynamic_shapes(self): - return {0: Dim.DYNAMIC} + return {0: Dim("batch_size", max=100)} class ModuleWithWhere(ModuleForExport): @@ -90,7 +90,7 @@ def get_sample_input(self): return torch.randn(2, 10) def get_dynamic_shapes(self): - return {0: Dim.DYNAMIC} + return {0: Dim("batch_size", max=100)} def check_xfail(self, f_export, use_dynamic_shape, device) -> bool: return ( @@ -129,7 +129,7 @@ def get_sample_input(self): return torch.randn(self.seq_len, self.num_experts) def get_dynamic_shapes(self): - return {0: Dim.DYNAMIC} + return {0: Dim("seq_len", max=100)} def check_xfail(self, f_export, use_dynamic_shape, device) -> bool: return ( @@ -153,7 +153,7 @@ def get_sample_input(self): return torch.randn(2, 10, device=self.fcs[0].weight.device) def get_dynamic_shapes(self): - return {0: Dim.DYNAMIC} + return {0: Dim("batch_size", max=100)} def check_xfail(self, f_export, use_dynamic_shape, device) -> bool: # non-strict mode only works with our hack in torch_export_to_gm diff --git a/tests/unittest/_torch/modules/test_fused_moe.py b/tests/unittest/_torch/modules/test_fused_moe.py index 29be45b8fd2..5a0b641f1b9 100644 --- a/tests/unittest/_torch/modules/test_fused_moe.py +++ b/tests/unittest/_torch/modules/test_fused_moe.py @@ -24,7 +24,6 @@ skip_pre_hopper) from tensorrt_llm._torch.autotuner import AutoTuner, autotune -from tensorrt_llm._torch.distributed import MPIDist, TorchDist from tensorrt_llm._torch.model_config import ModelConfig from tensorrt_llm._torch.modules.fused_moe.fused_moe_cute_dsl import \ CuteDslFusedMoE @@ -45,7 +44,7 @@ from tensorrt_llm._torch.modules.fused_moe.fused_moe_triton import \ IS_TRITON_KERNELS_AVAILABLE from tensorrt_llm._torch.modules.gated_mlp import GatedMLP -from tensorrt_llm._utils import get_sm_version, mpi_disabled, mpi_rank +from tensorrt_llm._utils import get_sm_version, mpi_rank from tensorrt_llm.mapping import Mapping from tensorrt_llm.models.modeling_utils import QuantAlgo, QuantConfig @@ -105,12 +104,6 @@ def test_fused_moe(moe_backend, mapping = mapping or Mapping() mapping.rank = mpi_rank() - if mpi_disabled(): - dist = TorchDist(mapping=mapping) - else: - dist = MPIDist(mapping=mapping) - - AutoTuner.get().setup_distributed_state(mapping, dist) torch.cuda.set_device(mapping.rank) diff --git a/tests/unittest/_torch/multimodal/test_mm_encoder_standalone.py b/tests/unittest/_torch/multimodal/test_mm_encoder_standalone.py index 993559879be..99154dd074a 100644 --- a/tests/unittest/_torch/multimodal/test_mm_encoder_standalone.py +++ b/tests/unittest/_torch/multimodal/test_mm_encoder_standalone.py @@ -21,12 +21,10 @@ _LLAVA_DIR = llm_models_root() / "multimodals" / "llava-v1.6-mistral-7b-hf" _QWEN_2_5_VL_DIR = llm_models_root() / "Qwen2.5-VL-3B-Instruct" -_QWEN_3_VL_DIR = llm_models_root() / "Qwen3" / "Qwen3-VL-2B-Instruct" # TODO: Add multi-image in single chat test -@pytest.mark.parametrize("model_dir", - [_LLAVA_DIR, _QWEN_2_5_VL_DIR, _QWEN_3_VL_DIR]) +@pytest.mark.parametrize("model_dir", [_LLAVA_DIR, _QWEN_2_5_VL_DIR]) @pytest.mark.parametrize("pd_disagg", [False, True]) def test_single_image_chat(model_dir, pd_disagg): """Test processing single image using encoder (pass mm_embeddings) + LLM API. @@ -182,7 +180,6 @@ def test_single_image_chat(model_dir, pd_disagg): # Qwen2.5 VL's vision encoder seems to output different embeddings based on this value. # The test only passes with this set to 1. (_QWEN_2_5_VL_DIR, 1), - (_QWEN_3_VL_DIR, 3), ], ) def test_multi_request_batch_chat(model_dir, encoder_max_batch_size):