@@ -656,6 +656,9 @@ def _test_e2e_selective_recompute(
656656def test_gpt_selective_activation_recompute (dtype , bs , model , fp8 , recipe , fp8_model_params ):
657657 if fp8_model_params and NVTE_TEST_NVINSPECT_ENABLED :
658658 pytest .skip ("FP8 parameters are not supported in debug mode." )
659+ if (IS_HIP_EXTENSION and get_device_compute_capability () == (9 , 5 ) and
660+ dtype in (torch .float16 , torch .bfloat16 ) and rocm_attn_backend ()[2 ]):
661+ pytest .skip ("Test is not supported on GFX950 with current parameters and CK fused attention backend and non-zero dropout." )
659662
660663 config = model_configs [model ]
661664
@@ -775,6 +778,8 @@ def test_gpt_full_activation_recompute(
775778 and recipe .float8_per_tensor_scaling ()
776779 ):
777780 pytest .skip ("hipBLASLt does not provide suitable algorithms on GFX950 for this config." )
781+ if (dtype in (torch .float16 , torch .bfloat16 ) and rocm_attn_backend ()[2 ]):
782+ pytest .skip ("Test is not supported on GFX950 with current parameters and CK fused attention backend and non-zero dropout." )
778783
779784 config = model_configs [model ]
780785 torch .compiler .reset () # avoid cache size limit overflow
@@ -926,6 +931,10 @@ def test_gpt_checkpointing(dtype, bs, model):
926931 config = model_configs [model ]
927932 if not is_fused_attn_available (config , dtype , deterministic = True ):
928933 pytest .skip ("No attention backend available." )
934+ if (IS_HIP_EXTENSION and get_device_compute_capability () == (9 , 5 ) and
935+ dtype in (torch .float16 , torch .bfloat16 ) and rocm_attn_backend ()[2 ]):
936+ pytest .skip ("Test is not supported on GFX950 with current parameters and CK fused attention backend and non-zero dropout." )
937+
929938 outputs = _test_e2e_checkpointing (bs , dtype , config , checkpoint = False )
930939 outputs_checkpoint = _test_e2e_checkpointing (bs , dtype , config , checkpoint = True )
931940
@@ -2685,6 +2694,9 @@ def _test_gpt_fp8_parameters(bs, dtype, config, fp8_model_params, recipe):
26852694def test_gpt_fp8_parameters (dtype , bs , model , recipe ):
26862695 if NVTE_TEST_NVINSPECT_ENABLED :
26872696 pytest .skip ("FP8 parameters are not supported in debug mode." )
2697+ if (IS_HIP_EXTENSION and get_device_compute_capability () == (9 , 5 ) and
2698+ dtype in (torch .float16 , torch .bfloat16 ) and rocm_attn_backend ()[2 ]):
2699+ pytest .skip ("Test is not supported on GFX950 with current parameters and CK fused attention backend and non-zero dropout." )
26882700
26892701 config = model_configs [model ]
26902702
@@ -2972,6 +2984,9 @@ def test_fp8gemm_with_unfused_quantization(N, datatype, input_quantizer, out_qua
29722984 pytest .skip (reason_for_no_fp8 )
29732985 if is_mxfp8_needed and not mxfp8_available :
29742986 pytest .skip (reason_for_no_mxfp8 )
2987+ if IS_HIP_EXTENSION and get_device_compute_capability () == (9 , 5 ):
2988+ if isinstance (out_quantizer , Float8Quantizer ):
2989+ pytest .skip ("hipBLASLt does not provide suitable algorithms on GFX950 for this config." )
29752990 inp_fp8 = input_quantizer (torch .randn (N , N , device = "cuda" , dtype = datatype ))
29762991 weight_fp8 = input_quantizer (torch .randn (N , N , device = "cuda" , dtype = datatype ))
29772992 outp_type = torch .float32
0 commit comments