v0.37.1

qaihm-bot · qaihm-bot · commit 19dc3e8bdf77 · 2025-09-17T15:52:25.000-07:00
See https://github.com/quic/ai-hub-models/releases/v0.37.1 for changelog. Signed-off-by: QAIHM Team <qaihm_bot@qti.qualcomm.com >
diff --git a/qai_hub_models/_version.py b/qai_hub_models/_version.py
@@ -3,4 +3,4 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 
-__version__ = "0.37.0"
+__version__ = "0.37.1"
diff --git a/qai_hub_models/global_requirements.txt b/qai_hub_models/global_requirements.txt
@@ -28,7 +28,7 @@ filelock>=3.16.1
 ftfy==6.1.1
 gdown==4.7.1
 gitpython==3.1.42
-huggingface_hub>=0.23.1,<1.0
+huggingface_hub>=0.34.0,<1.0
 hydra-core==1.3.0
 imageio[ffmpeg]==2.31.5
 imagesize==1.4.1
diff --git a/qai_hub_models/models/_shared/llm/export_helpers.py b/qai_hub_models/models/_shared/llm/export_helpers.py
@@ -491,8 +491,8 @@ def fetch_context_binaries(
     # Download each component's context binary.
     for component in components:
         link_job = component.link_job
-        assert link_job is not None and link_job.get_status().success
-        target_model_filename = f"{model_name}_{component.name}.bin"
+        assert link_job is not None and link_job.wait().success
+        target_model_filename = f"{model_name}_{component.name(len(components))}.bin"
         target_model_list.append(target_model_filename)
         cast(hub.Model, link_job.get_target_model()).download(
             str(output_path / target_model_filename)
@@ -522,7 +522,7 @@ def print_subcomponent_profile_metrics(
         AssertionError if the profile job failed.
     """
     profile_job = component.subcomponent_profile_job[instantiation_type]
-    if not profile_job.get_status().success:
+    if not profile_job.wait().success:
         print(
             f"Profile job for {component.subcomponent_name(instantiation_type, num_components=num_components)} failed:\n"
             f"    {profile_job.get_status().message}"
diff --git a/qai_hub_models/models/_shared/stable_diffusion/model.py b/qai_hub_models/models/_shared/stable_diffusion/model.py
@@ -5,11 +5,12 @@
 
 from __future__ import annotations
 
+from typing import TYPE_CHECKING
+
 # isort: off
 # This verifies aimet is installed, and this must be included first.
 from qai_hub_models.utils.quantization_aimet_onnx import (
     AIMETOnnxQuantizableMixin,
-    ensure_max_aimet_onnx_version,
 )
 
 # isort: on
@@ -19,9 +20,11 @@
 
 import diffusers
 import torch
-from aimet_common.defs import QuantScheme
-from aimet_onnx.quantsim import QuantizationSimModel as QuantSimOnnx
-from aimet_onnx.quantsim import load_encodings_to_sim
+
+if TYPE_CHECKING:
+    from aimet_onnx.quantsim import QuantizationSimModel as QuantSimOnnx
+
+
 from diffusers import AutoencoderKL, UNet2DConditionModel
 from diffusers.schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME
 from huggingface_hub import hf_hub_download
@@ -48,9 +51,9 @@
 )
 from qai_hub_models.utils.input_spec import InputSpec
 from qai_hub_models.utils.qai_hub_helpers import ensure_v73_or_later
+from qai_hub_models.utils.quantization_aimet_onnx import ensure_max_aimet_onnx_version
 
 MAX_AIMET_ONNX_VERSION = "2.6.0"
-ensure_max_aimet_onnx_version(MAX_AIMET_ONNX_VERSION)
 
 
 class TextEncoderBase(BaseModel, FromPretrainedMixin):
@@ -108,6 +111,11 @@ def from_pretrained(
         Create AimetQuantSim from checkpoint. QuantSim is calibrated if the
         checkpoint is an AIMET_ONNX_EXPORT or DEFAULT
         """
+        ensure_max_aimet_onnx_version(MAX_AIMET_ONNX_VERSION, cls.model_id)
+        from aimet_common.defs import QuantScheme
+        from aimet_onnx.quantsim import QuantizationSimModel as QuantSimOnnx
+        from aimet_onnx.quantsim import load_encodings_to_sim
+
         host_device = torch.device(host_device)
         subfolder = subfolder or cls.default_subfolder
         onnx_model, aimet_encodings = cls.onnx_from_pretrained(
@@ -227,6 +235,11 @@ def from_pretrained(
         Create AimetQuantSim from checkpoint. QuantSim is calibrated if the
         checkpoint is an AIMET_ONNX_EXPORT or DEFAULT
         """
+        ensure_max_aimet_onnx_version(MAX_AIMET_ONNX_VERSION, cls.model_id)
+        from aimet_common.defs import QuantScheme
+        from aimet_onnx.quantsim import QuantizationSimModel as QuantSimOnnx
+        from aimet_onnx.quantsim import load_encodings_to_sim
+
         host_device = torch.device(host_device)
         subfolder = subfolder or cls.default_subfolder
         onnx_model, aimet_encodings = cls.onnx_from_pretrained(
@@ -324,6 +337,11 @@ def from_pretrained(
         Create AimetQuantSim from checkpoint. QuantSim is calibrated if the
         checkpoint is an AIMET_ONNX_EXPORT or DEFAULT
         """
+        ensure_max_aimet_onnx_version(MAX_AIMET_ONNX_VERSION, cls.model_id)
+        from aimet_common.defs import QuantScheme
+        from aimet_onnx.quantsim import QuantizationSimModel as QuantSimOnnx
+        from aimet_onnx.quantsim import load_encodings_to_sim
+
         host_device = torch.device(host_device)
         subfolder = subfolder or cls.default_subfolder
         onnx_model, aimet_encodings = cls.onnx_from_pretrained(
diff --git a/qai_hub_models/models/falcon_v3_7b_instruct/requirements.txt b/qai_hub_models/models/falcon_v3_7b_instruct/requirements.txt
@@ -1,6 +1,5 @@
 aimet-onnx==2.10.0; sys_platform == 'linux' and python_version == "3.10"
 transformers==4.45.0
-huggingface_hub==0.23.2
 sentencepiece==0.2.0
 psutil
 onnx==1.16.2
diff --git a/qai_hub_models/models/llama_v3_1_8b_instruct/requirements.txt b/qai_hub_models/models/llama_v3_1_8b_instruct/requirements.txt
@@ -1,6 +1,5 @@
 aimet-onnx==2.10.0; sys_platform == 'linux' and python_version == "3.10"
 transformers==4.45.0
-huggingface_hub==0.23.2
 sentencepiece==0.2.0
 psutil
 onnx==1.16.2
diff --git a/qai_hub_models/models/llama_v3_1_sea_lion_3_5_8b_r/requirements.txt b/qai_hub_models/models/llama_v3_1_sea_lion_3_5_8b_r/requirements.txt
@@ -1,6 +1,5 @@
 aimet-onnx==2.10.0; sys_platform == 'linux' and python_version == "3.10"
 transformers==4.45.0
-huggingface_hub==0.23.2
 sentencepiece==0.2.0
 psutil
 onnx==1.16.2
diff --git a/qai_hub_models/models/llama_v3_2_1b_instruct/requirements.txt b/qai_hub_models/models/llama_v3_2_1b_instruct/requirements.txt
@@ -1,6 +1,5 @@
 aimet-onnx==2.10.0; sys_platform == 'linux' and python_version == "3.10"
 transformers==4.45.0
-huggingface_hub==0.23.2
 sentencepiece==0.2.0
 psutil
 onnx==1.16.2
diff --git a/qai_hub_models/models/llama_v3_2_3b_instruct/requirements.txt b/qai_hub_models/models/llama_v3_2_3b_instruct/requirements.txt
@@ -1,6 +1,5 @@
 aimet-onnx==2.10.0; sys_platform == 'linux' and python_version == "3.10"
 transformers==4.45.0
-huggingface_hub==0.23.2
 sentencepiece==0.2.0
 psutil
 onnx==1.16.2
diff --git a/qai_hub_models/models/llama_v3_8b_instruct/requirements.txt b/qai_hub_models/models/llama_v3_8b_instruct/requirements.txt
@@ -1,6 +1,5 @@
 aimet-onnx==2.10.0; sys_platform == 'linux' and python_version == "3.10"
 transformers==4.45.0
-huggingface_hub==0.23.2
 sentencepiece==0.2.0
 psutil
 onnx==1.16.2
diff --git a/qai_hub_models/models/llama_v3_taide_8b_chat/requirements.txt b/qai_hub_models/models/llama_v3_taide_8b_chat/requirements.txt
@@ -1,6 +1,5 @@
 aimet-onnx==2.10.0; sys_platform == 'linux' and python_version == "3.10"
 transformers==4.45.0
-huggingface_hub==0.23.2
 sentencepiece==0.2.0
 psutil
 onnx==1.16.2
diff --git a/qai_hub_models/models/stable_diffusion_v1_5/model.py b/qai_hub_models/models/stable_diffusion_v1_5/model.py
@@ -8,14 +8,6 @@
 
 from __future__ import annotations
 
-# isort: off
-# This verifies aimet is installed, and this must be included first.
-MODEL_ID = __name__.split(".")[-2]
-from qai_hub_models.utils.quantization_aimet_onnx import ensure_aimet_onnx_installed
-
-ensure_aimet_onnx_installed(model_id=MODEL_ID)
-# isort: on
-
 from diffusers import AutoencoderKL, UNet2DConditionModel
 from transformers import CLIPTextModel, CLIPTokenizer
 
@@ -29,7 +21,7 @@
 from qai_hub_models.utils.input_spec import InputSpec
 
 MODEL_ASSET_VERSION = 1
-
+MODEL_ID = __name__.split(".")[-2]
 HF_REPO = "stable-diffusion-v1-5/stable-diffusion-v1-5"
 
 
diff --git a/qai_hub_models/models/stable_diffusion_v2_1/model.py b/qai_hub_models/models/stable_diffusion_v2_1/model.py
@@ -8,14 +8,6 @@
 
 from __future__ import annotations
 
-# isort: off
-# This verifies aimet is installed, and this must be included first.
-MODEL_ID = __name__.split(".")[-2]
-from qai_hub_models.utils.quantization_aimet_onnx import ensure_aimet_onnx_installed
-
-ensure_aimet_onnx_installed(model_id=MODEL_ID)
-# isort: on
-
 from diffusers import AutoencoderKL, UNet2DConditionModel
 from transformers import CLIPTextModel, CLIPTokenizer
 
@@ -28,7 +20,7 @@
 from qai_hub_models.utils.base_model import CollectionModel
 
 MODEL_ASSET_VERSION = 3
-
+MODEL_ID = __name__.split(".")[-2]
 HF_REPO = "stabilityai/stable-diffusion-2-1-base"
 
 
diff --git a/qai_hub_models/requirements.txt b/qai_hub_models/requirements.txt
@@ -1,7 +1,7 @@
 Pillow>10,<12
 gdown==4.7.1
 gitpython==3.1.42
-huggingface_hub>=0.23.1,<1.0
+huggingface_hub>=0.34.0,<1.0 # 0.34 contains a fix for long windows paths
 ipython==8.12.3
 numpy<2
 onnx>=1.16.1,<1.20 # 1.16.1 is allowed for compatiblity with AIMET-ONNX
diff --git a/qai_hub_models/test/test_utils/test_fetch_static_assets.py b/qai_hub_models/test/test_utils/test_fetch_static_assets.py
@@ -41,13 +41,18 @@ def hf_glob_patch(file_exists: bool = True, component_glob_result: list[str] = [
             This is the list of component names. The glob will return 1 file path per component.
     """
 
-    def hf_glob(self, path: str):
+    def hf_glob(self, path: str, revision: str | None = None):
+        if revision is not None:
+            org, repo, file = path.split("/", maxsplit=2)
+            path = "/".join([org, repo + f"@{revision}", file])
+
         if file_exists and component_glob_result:
             # Act like the .* in the glob returns each component
             return [
                 path.replace("*", f"_{component}" if component else "")
                 for component in component_glob_result
             ]
+
         return [path] if file_exists else []
 
     return mock.patch("qai_hub_models.utils.huggingface.HfFileSystem.glob", hf_glob)
diff --git a/qai_hub_models/utils/huggingface.py b/qai_hub_models/utils/huggingface.py
@@ -6,6 +6,7 @@
 from __future__ import annotations
 
 import os
+import posixpath
 from pathlib import Path
 
 from huggingface_hub import HfApi, HfFileSystem, hf_hub_download, hf_hub_url
@@ -76,14 +77,15 @@ def fetch_huggingface_target_model(
     hf_path = config.get_huggingface_path(model_name)
     file_types = [runtime_path.file_extension]
 
-    files = []
+    files: list[str] = []
     for component_name in model_components or ["COMPONENT"]:  # type: ignore[list-item]
         for file_type in file_types:
             files += fs.glob(
-                os.path.join(
+                posixpath.join(
                     hf_path,
                     f"{get_huggingface_model_filename(model_name, component_name, precision, chipset if runtime_path.is_aot_compiled else None, runtime_path.is_aot_compiled).replace('_COMPONENT', '*')}.{file_type}",
-                )
+                ),
+                revision=qaihm_version_tag,
             )
 
     if not files:
@@ -96,18 +98,19 @@ def fetch_huggingface_target_model(
     paths: list[Path] = []
     urls: list[str] = []
     for file in files:
+        _org, _repo_and_revision, filepath = file.split("/", maxsplit=2)
         if output_folder:
             path = hf_hub_download(
                 hf_path,
-                file[len(hf_path) + 1 :],
+                filepath,
                 local_dir=str(output_folder),
                 revision=qaihm_version_tag,
             )
             paths.append(Path(path))
 
         url = hf_hub_url(
             hf_path,
-            file[len(hf_path) + 1 :],
+            filepath,
             revision=qaihm_version_tag,
         )
         urls.append(url)
diff --git a/qai_hub_models/utils/quantization_aimet_onnx.py b/qai_hub_models/utils/quantization_aimet_onnx.py
@@ -67,6 +67,10 @@ def ensure_aimet_onnx_installed(
             else:
                 errstr += "Run "
             errstr += f"`pip install {install_target}` to install the correct version of AIMET-ONNX."
+
+        if model_id is not None:
+            errstr += f"\nAlternatively, for model export, you may run `python -m qai_hub_models.models.{model_id}.export.py --fetch-static-assets` to fetch pre-compiled assets for this model."
+
         raise RuntimeError(errstr)