Skip to content
8 changes: 8 additions & 0 deletions thunder/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,11 @@ def pytest_collection_modifyitems(items):

def pytest_addoption(parser):
parser.addoption("--gpu-mem-limit", type=float)


@pytest.fixture
def turn_off_tf32_and_set_seed(monkeypatch):
monkeypatch.setenv("NVIDIA_TF32_OVERRIDE", "0")
torch.manual_seed(42)
yield
torch.seed()
5 changes: 5 additions & 0 deletions thunder/tests/distributed/test_tensor_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,10 @@ def forward(self, x):
actual=tp_jitted_model.get_parameter("embed.weight").grad,
)

# Note: When running with TF32 enabled on CUDA, the maximum absolute difference between outputs
# can be on the order of 1e-3, which exceeds the default tolerances for torch.testing.assert_close.
# This is expected due to the reduced precision of TF32 matrix multiplications.
@pytest.mark.usefixtures("turn_off_tf32_and_set_seed")
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="")
@common_utils.parametrize("bias", (True, False))
def test_both_column_and_row(self, bias):
Expand All @@ -154,6 +158,7 @@ def forward(self, x):
return h

device = torch.device("cuda", self.rank)

x = torch.randint(0, num_embeddings - 1, (16, 16), device=device)
x_ref = x.clone().detach()

Expand Down
5 changes: 4 additions & 1 deletion thunder/tests/test_grad.py
Original file line number Diff line number Diff line change
Expand Up @@ -1487,8 +1487,11 @@ def test_populate_grads_block(executor, device, dtype):
assert_close(torch_grads, thunder_grads, atol=1e-2, rtol=1e-2)


# Note: When running with TF32 enabled on CUDA, the maximum absolute difference between outputs
# can be on the order of 1e-3, which exceeds the default tolerances for torch.testing.assert_close.
# This is expected due to the reduced precision of TF32 matrix multiplications.
@instantiate(dtypes=(thunder.float32,))
def test_populate_grads_nanogpt(executor, device, dtype):
def test_populate_grads_nanogpt(executor, device, dtype, turn_off_tf32_and_set_seed):
import sys

if sys.platform == "win32":
Expand Down
10 changes: 8 additions & 2 deletions thunder/tests/test_jit_general.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,6 +649,9 @@ def test_nanogpt():
assert_close(result, module(*args, **kwargs))


# Note: When running with TF32 enabled on CUDA, the maximum absolute difference between outputs
# can be on the order of 1e-3, which exceeds the default tolerances for torch.testing.assert_close.
# This is expected due to the reduced precision of TF32 matrix multiplications.
@skipif_not_pytorch_2_1
@pytest.mark.parametrize(
"name",
Expand All @@ -668,7 +671,7 @@ def test_nanogpt():
"device",
("cpu", "cuda", "meta"),
)
def test_litgpt_variants(name, device):
def test_litgpt_variants(name, device, turn_off_tf32_and_set_seed):
from thunder.tests.litgpt_model import Config
from litgpt.model import GPT

Expand Down Expand Up @@ -704,6 +707,9 @@ def test_litgpt_variants(name, device):
torch.testing.assert_close(param1.grad, param2.grad, rtol=1e-2, atol=1e-2)


# Note: When running with TF32 enabled on CUDA, the maximum absolute difference between outputs
# can be on the order of 1e-3, which exceeds the default tolerances for torch.testing.assert_close.
# This is expected due to the reduced precision of TF32 matrix multiplications.
@skipif_not_pytorch_2_1
@pytest.mark.parametrize(
"name",
Expand All @@ -724,7 +730,7 @@ def test_litgpt_variants(name, device):
"device",
("cpu", "cuda"),
)
def test_litgpt_variants_kvcache(name, device):
def test_litgpt_variants_kvcache(name, device, turn_off_tf32_and_set_seed):
from thunder.tests.litgpt_model import Config
from litgpt.model import GPT
import torch._dynamo # this monkeypatches torch.manual_seed
Expand Down
2 changes: 1 addition & 1 deletion thunder/tests/test_networks.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@

# see https://docs.pytest.org/en/stable/how-to/capture-warnings.html#recwarn for the recwarn fixture
@instantiate(dtypes=(thunder.float32,), executors=all_test_executors_and_dynamo)
def test_nanogpt_complete(executor, device, dtype, recwarn):
def test_nanogpt_complete(executor, device, dtype, recwarn, turn_off_tf32_and_set_seed):
tdtype = ttorch.to_torch_dtype(dtype)
make = partial(make_tensor, dtype=torch.int64, device=device)

Expand Down
8 changes: 0 additions & 8 deletions thunder/tests/test_update_aliases.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,14 +82,6 @@ def inplace_masked_fill_sample_generator(op, device, dtype, requires_grad, **kwa
_inplace_opinfos.append(inplace_opinfo)


@pytest.fixture
def turn_off_tf32_and_set_seed(monkeypatch):
monkeypatch.setenv("NVIDIA_TF32_OVERRIDE", "0")
torch.manual_seed(42)
yield
torch.seed()


@instantiate(
dtypes=(thunder.float32, thunder.float64),
devicetypes=(devices.DeviceType.CUDA,),
Expand Down
Loading