-
Notifications
You must be signed in to change notification settings - Fork 300
Description
โ๏ธ Your current environment
flash-linear-attention 0.4.1
torch 2.8.0+cu126
torchaudio 2.8.0+cu126
torchvision 0.23.0+cu126
triton 3.4.0
transformers 4.57.1
llmcompressor 0.8.2.dev65+gdb0b68d9.d20251128
๐ Describe the bug
File "/home/jovyan/anaconda3/xyf_swift_1125/lib/python3.12/site-packages/transformers/modeling_layers.py", line 94, in call
return super().call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jovyan/anaconda3/xyf_swift_1125/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jovyan/anaconda3/xyf_swift_1125/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jovyan/anaconda3/xyf_swift_1125/lib/python3.12/site-packages/transformers/utils/deprecation.py", line 172, in wrapped_func
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/jovyan/anaconda3/xyf_swift_1125/lib/python3.12/site-packages/transformers/models/qwen3_next/modeling_qwen3_next.py", line 920, in forward
hidden_states = self.linear_attn(
^^^^^^^^^^^^^^^^^
File "/home/jovyan/anaconda3/xyf_swift_1125/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jovyan/anaconda3/xyf_swift_1125/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jovyan/anaconda3/xyf_swift_1125/lib/python3.12/site-packages/accelerate/hooks.py", line 175, in new_forward
output = module._old_forward(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jovyan/anaconda3/xyf_swift_1125/lib/python3.12/site-packages/transformers/models/qwen3_next/modeling_qwen3_next.py", line 739, in forward
core_attn_out, last_recurrent_state = self.chunk_gated_delta_rule(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jovyan/anaconda3/xyf_swift_1125/lib/python3.12/site-packages/torch/_dynamo/eval_frame.py", line 929, in _fn
return fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/home/jovyan/anaconda3/xyf_swift_1125/lib/python3.12/site-packages/fla/ops/gated_delta_rule/chunk.py", line 311, in chunk_gated_delta_rule
o, final_state = ChunkGatedDeltaRuleFunction.apply(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jovyan/anaconda3/xyf_swift_1125/lib/python3.12/site-packages/torch/autograd/function.py", line 576, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jovyan/anaconda3/xyf_swift_1125/lib/python3.12/site-packages/fla/utils.py", line 161, in wrapper
ctx = custom_device_ctx(tensor.device.index)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jovyan/anaconda3/xyf_swift_1125/lib/python3.12/site-packages/fla/utils.py", line 462, in custom_device_ctx
return device_torch_lib.device(index)
^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: module 'torch.cpu' has no attribute 'device'. Did you mean: '_device'?
๐ ๏ธ Steps to reproduce
No response