@@ -863,6 +863,10 @@ def test_compare_to_transformers(self, model_arch):
863863        if  model_arch  in  self .REMOTE_CODE_MODELS :
864864            model_kwargs  =  {"trust_remote_code" : True }
865865
866+         # starting from transformers 4.45.0 gemma2 uses eager attention by default, while ov - sdpa 
867+         if  model_arch  ==  "gemma2"  and  is_transformers_version (">=" , "4.45.0" ):
868+             model_kwargs ["attn_implementation" ] =  "sdpa" 
869+ 
866870        ov_model  =  OVModelForCausalLM .from_pretrained (model_id , export = True , ov_config = F32_CONFIG , ** model_kwargs )
867871        self .assertIsInstance (ov_model .config , PretrainedConfig )
868872        self .assertTrue (ov_model .use_cache )
@@ -1094,6 +1098,10 @@ def test_beam_search(self, model_arch):
10941098                "config" : AutoConfig .from_pretrained (model_id , trust_remote_code = True ),
10951099                "trust_remote_code" : True ,
10961100            }
1101+ 
1102+         # starting from transformers 4.45.0 gemma2 uses eager attention by default, while ov - sdpa 
1103+         if  model_arch  ==  "gemma2"  and  is_transformers_version (">=" , "4.45.0" ):
1104+             model_kwargs ["attn_implementation" ] =  "sdpa" 
10971105        # Qwen tokenizer does not support padding, chatglm, glm4 testing models produce nan that incompatible with beam search 
10981106        if  model_arch  in  ["qwen" , "chatglm" , "glm4" ]:
10991107            return 
0 commit comments