@@ -649,6 +649,9 @@ def test_nanogpt():
649
649
assert_close (result , module (* args , ** kwargs ))
650
650
651
651
652
+ # Note: When running with TF32 enabled on CUDA, the maximum absolute difference between outputs
653
+ # can be on the order of 1e-3, which exceeds the default tolerances for torch.testing.assert_close.
654
+ # This is expected due to the reduced precision of TF32 matrix multiplications.
652
655
@skipif_not_pytorch_2_1
653
656
@pytest .mark .parametrize (
654
657
"name" ,
@@ -668,7 +671,7 @@ def test_nanogpt():
668
671
"device" ,
669
672
("cpu" , "cuda" , "meta" ),
670
673
)
671
- def test_litgpt_variants (name , device ):
674
+ def test_litgpt_variants (name , device , turn_off_tf32_and_set_seed ):
672
675
from thunder .tests .litgpt_model import Config
673
676
from litgpt .model import GPT
674
677
@@ -704,6 +707,9 @@ def test_litgpt_variants(name, device):
704
707
torch .testing .assert_close (param1 .grad , param2 .grad , rtol = 1e-2 , atol = 1e-2 )
705
708
706
709
710
+ # Note: When running with TF32 enabled on CUDA, the maximum absolute difference between outputs
711
+ # can be on the order of 1e-3, which exceeds the default tolerances for torch.testing.assert_close.
712
+ # This is expected due to the reduced precision of TF32 matrix multiplications.
707
713
@skipif_not_pytorch_2_1
708
714
@pytest .mark .parametrize (
709
715
"name" ,
@@ -724,7 +730,7 @@ def test_litgpt_variants(name, device):
724
730
"device" ,
725
731
("cpu" , "cuda" ),
726
732
)
727
- def test_litgpt_variants_kvcache (name , device ):
733
+ def test_litgpt_variants_kvcache (name , device , turn_off_tf32_and_set_seed ):
728
734
from thunder .tests .litgpt_model import Config
729
735
from litgpt .model import GPT
730
736
import torch ._dynamo # this monkeypatches torch.manual_seed
0 commit comments