File tree Expand file tree Collapse file tree 1 file changed +3
-2
lines changed
Expand file tree Collapse file tree 1 file changed +3
-2
lines changed Original file line number Diff line number Diff line change 4343from transformers .utils .deprecation import deprecate_kwarg
4444from transformers .utils .generic import OutputRecorder , check_model_inputs
4545from .configuration_doge import DogeConfig
46+ from transformers .models .doge .modeling_doge import DogeAttention
4647
4748try :
4849 from flash_sparse_attn .integrations .flash_sparse_attention import flash_sparse_attention_forward
@@ -372,10 +373,10 @@ class DogePreTrainedModel(PreTrainedModel):
372373 _no_split_modules = ["DogeDecoderLayer" ]
373374 _skip_keys_device_placement = ["past_key_values" ]
374375 _supports_flash_attn = False
375- _supports_sdpa = False
376+ _supports_sdpa = True
376377 _supports_flex_attn = False
377378 _can_compile_fullgraph = False
378- _supports_attention_backend = False
379+ _supports_attention_backend = True
379380 _can_record_outputs = {
380381 "router_logits" : OutputRecorder (DogeCDMoE , index = 1 ),
381382 "hidden_states" : DogeDecoderLayer ,
You can’t perform that action at this time.
0 commit comments