Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions src/transformers/cache_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,6 @@
)


if _is_quanto_greater_than_0_2_5 := is_quanto_greater("0.2.5", accept_dev=True):
from optimum.quanto import MaxOptimizer, qint2, qint4, quantize_weight

Comment on lines -19 to -21
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe those can stay at the top! It's cleaner to have them here!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After fixing the _is_package_available call, if you leave it on top, everything crashes due to circular dependencies:

>>> from transformers import QuantoQuantizedCache
Traceback (most recent call last):
  File "<python-input-0>", line 1, in <module>
    from transformers import QuantoQuantizedCache
  File "/home/manuel_deprada/transformers/src/transformers/utils/import_utils.py", line 2306, in __getattr__
    module = self._get_module(self._class_to_module[name])
  File "/home/manuel_deprada/transformers/src/transformers/utils/import_utils.py", line 2336, in _get_module
    raise e
  File "/home/manuel_deprada/transformers/src/transformers/utils/import_utils.py", line 2334, in _get_module
    return importlib.import_module("." + module_name, self.__name__)
           ~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/raid/manuel/micromamba/envs/py313/lib/python3.13/importlib/__init__.py", line 88, in import_module
    return _bootstrap._gcd_import(name[level:], package, level)
           ~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/manuel_deprada/transformers/src/transformers/cache_utils.py", line 19, in <module>
    from optimum.quanto import MaxOptimizer, qint2, qint4, quantize_weight
  File "/raid/manuel/micromamba/envs/py313/lib/python3.13/site-packages/optimum/quanto/__init__.py", line 19, in <module>
    from .models import *
  File "/raid/manuel/micromamba/envs/py313/lib/python3.13/site-packages/optimum/quanto/models/__init__.py", line 30, in <module>
    from .transformers_models import *
  File "/raid/manuel/micromamba/envs/py313/lib/python3.13/site-packages/optimum/quanto/models/transformers_models.py", line 33, in <module>
    from transformers import AutoConfig, AutoModelForCausalLM, PreTrainedModel
  File "/home/manuel_deprada/transformers/src/transformers/utils/import_utils.py", line 2306, in __getattr__
    module = self._get_module(self._class_to_module[name])
  File "/home/manuel_deprada/transformers/src/transformers/utils/import_utils.py", line 2336, in _get_module
    raise e
  File "/home/manuel_deprada/transformers/src/transformers/utils/import_utils.py", line 2334, in _get_module
    return importlib.import_module("." + module_name, self.__name__)
           ~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/raid/manuel/micromamba/envs/py313/lib/python3.13/importlib/__init__.py", line 88, in import_module
    return _bootstrap._gcd_import(name[level:], package, level)
           ~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/manuel_deprada/transformers/src/transformers/models/auto/modeling_auto.py", line 23, in <module>
    from .auto_factory import (
    ...<4 lines>...
    )
  File "/home/manuel_deprada/transformers/src/transformers/models/auto/auto_factory.py", line 43, in <module>
    from ...generation import GenerationMixin
  File "/home/manuel_deprada/transformers/src/transformers/utils/import_utils.py", line 2306, in __getattr__
    module = self._get_module(self._class_to_module[name])
  File "/home/manuel_deprada/transformers/src/transformers/utils/import_utils.py", line 2336, in _get_module
    raise e
  File "/home/manuel_deprada/transformers/src/transformers/utils/import_utils.py", line 2334, in _get_module
    return importlib.import_module("." + module_name, self.__name__)
           ~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/raid/manuel/micromamba/envs/py313/lib/python3.13/importlib/__init__.py", line 88, in import_module
    return _bootstrap._gcd_import(name[level:], package, level)
           ~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/manuel_deprada/transformers/src/transformers/generation/utils.py", line 31, in <module>
    from ..cache_utils import (
    ...<6 lines>...
    )
ImportError: cannot import name 'Cache' from partially initialized module 'transformers.cache_utils' (most likely due to a circular import) (/home/manuel_deprada/transformers/src/transformers/cache_utils.py)
>>> 

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh woww, they import us as well... I see, thanks!

if is_hqq_available():
from hqq.core.quantize import Quantizer as HQQQuantizer

Expand Down Expand Up @@ -558,7 +555,7 @@ def __init__(
q_group_size: int = 64,
residual_length: int = 128,
):
super().__init__(self)
super().__init__()
self.nbits = nbits
self.axis_key = axis_key
self.axis_value = axis_value
Expand Down Expand Up @@ -635,10 +632,12 @@ def __init__(
residual_length=residual_length,
)

if not _is_quanto_greater_than_0_2_5:
# We need to import quanto here to avoid circular imports due to optimum/quanto/models/transformers_models.py
if is_quanto_greater("0.2.5", accept_dev=True):
from optimum.quanto import MaxOptimizer, qint2, qint4
else:
raise ImportError(
"You need optimum-quanto package version to be greater or equal than 0.2.5 to use `QuantoQuantizedCache`. "
"Detected version {optimum_quanto_version}."
)

if self.nbits not in [2, 4]:
Expand All @@ -656,6 +655,8 @@ def __init__(
self.optimizer = MaxOptimizer() # hardcode as it's the only one for per-channel quantization

def _quantize(self, tensor, axis):
from optimum.quanto import quantize_weight

scale, zeropoint = self.optimizer(tensor, self.qtype, axis, self.q_group_size)
qtensor = quantize_weight(tensor, self.qtype, axis, scale, zeropoint, self.q_group_size)
return qtensor
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/utils/import_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1286,7 +1286,7 @@ def is_quanto_greater(library_version: str, accept_dev: bool = False):
given version. If `accept_dev` is True, it will also accept development versions (e.g. 2.7.0.dev20250320 matches
2.7.0).
"""
if not _is_package_available("optimum-quanto"):
if not _is_package_available("optimum.quanto"):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you sure here? It was with a dash before

Copy link
Contributor Author

@manueldeprada manueldeprada Aug 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, look:

image

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alright then!

return False

if accept_dev:
Expand Down