Skip to content

Commit 8bad7b6

Browse files
diego-urgellfacebook-github-bot
authored andcommitted
Move device GPU tests to different file (#762)
Summary: Pull Request resolved: #762 Reviewed By: JKSenthil Differential Revision: D55381991 fbshipit-source-id: 2d654e6c8e63480427a742519847e0a5207eaf48
1 parent b27d916 commit 8bad7b6

File tree

2 files changed

+294
-278
lines changed

2 files changed

+294
-278
lines changed

tests/utils/test_device.py

Lines changed: 1 addition & 278 deletions
Original file line numberDiff line numberDiff line change
@@ -7,240 +7,23 @@
77

88
# pyre-strict
99

10-
import dataclasses
11-
import os
1210
import unittest
13-
from collections import defaultdict, namedtuple
14-
from dataclasses import dataclass
15-
from typing import Any, Dict
1611
from unittest import mock
17-
from unittest.mock import patch
1812

1913
import torch
2014
from torchtnt.utils.device import (
21-
copy_data_to_device,
2215
get_device_from_env,
2316
get_nvidia_smi_gpu_stats,
2417
get_psutil_cpu_stats,
25-
record_data_in_stream,
26-
set_float32_precision,
2718
)
28-
from torchtnt.utils.test_utils import skip_if_not_gpu
2919

3020

3121
class DeviceTest(unittest.TestCase):
32-
@patch("torch.cuda.is_available", return_value=False)
33-
def test_get_cpu_device(self, _) -> None:
22+
def test_get_cpu_device(self) -> None:
3423
device = get_device_from_env()
3524
self.assertEqual(device.type, "cpu")
3625
self.assertEqual(device.index, None)
3726

38-
@skip_if_not_gpu
39-
def test_get_gpu_device(self) -> None:
40-
device_idx = torch.cuda.device_count() - 1
41-
self.assertGreaterEqual(device_idx, 0)
42-
with mock.patch.dict(os.environ, {"LOCAL_RANK": str(device_idx)}, clear=True):
43-
device = get_device_from_env()
44-
self.assertEqual(device.type, "cuda")
45-
self.assertEqual(device.index, device_idx)
46-
self.assertEqual(device.index, torch.cuda.current_device())
47-
48-
invalid_device_idx = device_idx + 10
49-
with mock.patch.dict(os.environ, {"LOCAL_RANK": str(invalid_device_idx)}):
50-
with self.assertRaises(
51-
RuntimeError,
52-
msg="The local rank is larger than the number of available GPUs",
53-
):
54-
device = get_device_from_env()
55-
56-
# Test that we fall back to 0 if LOCAL_RANK is not specified
57-
device = get_device_from_env()
58-
self.assertEqual(device.type, "cuda")
59-
self.assertEqual(device.index, 0)
60-
self.assertEqual(device.index, torch.cuda.current_device())
61-
62-
@skip_if_not_gpu
63-
def test_copy_data_to_device_tensor(self) -> None:
64-
cuda_0 = torch.device("cuda:0")
65-
a = torch.tensor([1, 2, 3])
66-
self.assertEqual(a.device.type, "cpu")
67-
a = copy_data_to_device(a, cuda_0)
68-
self.assertEqual(a.device.type, "cuda")
69-
70-
@skip_if_not_gpu
71-
def test_copy_data_to_device_module(self) -> None:
72-
cuda_0 = torch.device("cuda:0")
73-
model = torch.nn.Linear(1, 1)
74-
for param in model.parameters():
75-
self.assertEqual(param.device.type, "cpu")
76-
model = copy_data_to_device(model, cuda_0)
77-
for param in model.parameters():
78-
self.assertEqual(param.device.type, "cuda")
79-
80-
@skip_if_not_gpu
81-
def test_copy_data_to_device_list(self) -> None:
82-
cuda_0 = torch.device("cuda:0")
83-
b = torch.tensor([1, 2, 3])
84-
c = torch.tensor([4, 5, 6])
85-
original_list = [b, c]
86-
self.assertEqual(b.device.type, "cpu")
87-
self.assertEqual(c.device.type, "cpu")
88-
new_list = copy_data_to_device(original_list, cuda_0)
89-
for elem in new_list:
90-
self.assertEqual(elem.device.type, "cuda")
91-
92-
@skip_if_not_gpu
93-
def test_copy_data_to_device_tuple(self) -> None:
94-
cuda_0 = torch.device("cuda:0")
95-
d = torch.tensor([1, 2, 3])
96-
e = torch.tensor([4, 5, 6])
97-
original_tuple = (d, e)
98-
self.assertEqual(d.device.type, "cpu")
99-
self.assertEqual(e.device.type, "cpu")
100-
new_tuple = copy_data_to_device(original_tuple, cuda_0)
101-
for elem in new_tuple:
102-
self.assertEqual(elem.device.type, "cuda")
103-
104-
@skip_if_not_gpu
105-
def test_copy_data_to_device_dict(self) -> None:
106-
cuda_0 = torch.device("cuda:0")
107-
f = torch.tensor([1, 2, 3])
108-
g = torch.tensor([4, 5, 6])
109-
original_dict = {"f": f, "g": g}
110-
self.assertEqual(f.device.type, "cpu")
111-
self.assertEqual(g.device.type, "cpu")
112-
new_dict = copy_data_to_device(original_dict, cuda_0)
113-
for key in new_dict.keys():
114-
self.assertEqual(new_dict[key].device.type, "cuda")
115-
116-
@skip_if_not_gpu
117-
def test_copy_data_to_device_named_tuple(self) -> None:
118-
cuda_0 = torch.device("cuda:0")
119-
120-
# named tuple of tensors
121-
h = torch.tensor([1, 2, 3])
122-
i = torch.tensor([4, 5, 6])
123-
tensor_tuple = namedtuple("tensor_tuple", ["tensor_a", "tensor_b"])
124-
original_named_tuple = tensor_tuple(h, i)
125-
self.assertEqual(h.device.type, "cpu")
126-
self.assertEqual(i.device.type, "cpu")
127-
new_named_tuple = copy_data_to_device(original_named_tuple, cuda_0)
128-
for elem in new_named_tuple:
129-
self.assertEqual(elem.device.type, "cuda")
130-
131-
self.assertIsNotNone(new_named_tuple.tensor_a)
132-
self.assertIsNotNone(new_named_tuple.tensor_b)
133-
self.assertEqual(type(original_named_tuple), type(new_named_tuple))
134-
135-
@skip_if_not_gpu
136-
def test_copy_data_to_device_dataclass(self) -> None:
137-
cuda_0 = torch.device("cuda:0")
138-
139-
# dataclass of tensors
140-
@dataclass
141-
class TestTensorDataClass:
142-
val: torch.Tensor
143-
144-
original_data_class = TestTensorDataClass(
145-
val=torch.tensor([1, 2, 3]),
146-
)
147-
self.assertEqual(original_data_class.val.device.type, "cpu")
148-
new_data_class = copy_data_to_device(original_data_class, cuda_0)
149-
self.assertEqual(new_data_class.val.device.type, "cuda")
150-
151-
# frozen dataclass
152-
@dataclass(frozen=True)
153-
class FrozenDataClass:
154-
val: torch.Tensor
155-
156-
original_data_class = FrozenDataClass(
157-
val=torch.tensor([1, 2, 3]),
158-
)
159-
self.assertEqual(original_data_class.val.device.type, "cpu")
160-
new_data_class = copy_data_to_device(original_data_class, cuda_0)
161-
self.assertEqual(new_data_class.val.device.type, "cuda")
162-
163-
# no-init field
164-
@dataclass
165-
class NoInitDataClass:
166-
val: torch.Tensor = dataclasses.field(init=False)
167-
168-
def __post_init__(self):
169-
self.val = torch.tensor([0, 1])
170-
171-
original_data_class = NoInitDataClass()
172-
original_data_class.val = torch.tensor([1, 2])
173-
self.assertEqual(original_data_class.val.device.type, "cpu")
174-
new_data_class = copy_data_to_device(original_data_class, cuda_0)
175-
self.assertEqual(new_data_class.val.device.type, "cuda")
176-
self.assertTrue(
177-
torch.equal(new_data_class.val, torch.tensor([1, 2], device=cuda_0))
178-
)
179-
180-
@skip_if_not_gpu
181-
def test_copy_data_to_device_defaultdict(self) -> None:
182-
cuda_0 = torch.device("cuda:0")
183-
184-
dd = defaultdict(torch.Tensor)
185-
dd[1] = torch.tensor([1, 2, 3])
186-
# dd[2] takes the default value, an empty tensor
187-
_ = dd[2]
188-
189-
self.assertEqual(dd[1].device.type, "cpu")
190-
self.assertEqual(dd[2].device.type, "cpu")
191-
192-
new_dd = copy_data_to_device(dd, cuda_0)
193-
194-
self.assertEqual(new_dd[1].device, cuda_0)
195-
self.assertEqual(new_dd[2].device, cuda_0)
196-
197-
# make sure the type of new keys is the same
198-
self.assertEqual(type(dd[3]), type(new_dd[3]))
199-
200-
@skip_if_not_gpu
201-
def test_copy_data_to_device_nested(self) -> None:
202-
h = torch.tensor([1, 2, 3])
203-
i = torch.tensor([4, 5, 6])
204-
j = torch.tensor([7, 8, 9])
205-
k = torch.tensor([10, 11])
206-
m = torch.tensor([12, 13])
207-
n = torch.tensor([14, 15])
208-
self.assertEqual(h.device.type, "cpu")
209-
self.assertEqual(i.device.type, "cpu")
210-
self.assertEqual(j.device.type, "cpu")
211-
self.assertEqual(k.device.type, "cpu")
212-
self.assertEqual(m.device.type, "cpu")
213-
self.assertEqual(n.device.type, "cpu")
214-
215-
nested_list = [(h, i), (j, k)]
216-
nested_dict = {"1": nested_list, "2": [m], "3": n, "4": 2.0, "5": "string"}
217-
218-
@dataclass
219-
class NestedDataClass:
220-
dict_container: Dict[str, Any]
221-
222-
nested_data_class = NestedDataClass(dict_container=nested_dict)
223-
224-
cuda_0 = torch.device("cuda:0")
225-
new_data_class = copy_data_to_device(nested_data_class, cuda_0)
226-
for val in new_data_class.dict_container.values():
227-
if isinstance(val, list):
228-
for list_item in val:
229-
if isinstance(list_item, torch.Tensor):
230-
self.assertEqual(list_item.device.type, "cuda")
231-
if isinstance(list_item, tuple):
232-
for tuple_item in list_item:
233-
print(tuple_item)
234-
self.assertEqual(tuple_item.device.type, "cuda")
235-
elif isinstance(val, torch.Tensor):
236-
self.assertEqual(val.device.type, "cuda")
237-
# check that float is unchanged
238-
elif isinstance(val, float):
239-
self.assertEqual(val, 2.0)
240-
# check that string is unchanged
241-
elif isinstance(val, str):
242-
self.assertEqual(val, "string")
243-
24427
def test_get_cpu_stats(self) -> None:
24528
"""Get CPU stats, check that values are populated."""
24629
cpu_stats = get_psutil_cpu_stats()
@@ -275,63 +58,3 @@ def test_get_gpu_stats(self) -> None:
27558
self.assertGreaterEqual(gpu_stats["memory_free_mb"], 0)
27659
self.assertGreaterEqual(gpu_stats["temperature_gpu_celsius"], 0)
27760
self.assertGreaterEqual(gpu_stats["temperature_memory_celsius"], 0)
278-
279-
@skip_if_not_gpu
280-
def test_record_data_in_stream_dict(self) -> None:
281-
curr_stream = torch.cuda.current_stream()
282-
a = torch.tensor([1, 2, 3])
283-
b = torch.tensor([4, 5, 6])
284-
data = {"a": a, "b": b}
285-
286-
with mock.patch.object(
287-
a, "record_stream"
288-
) as mock_record_stream_a, mock.patch.object(
289-
b, "record_stream"
290-
) as mock_record_stream_b:
291-
record_data_in_stream(data, curr_stream)
292-
mock_record_stream_a.assert_called_once()
293-
mock_record_stream_b.assert_called_once()
294-
295-
@skip_if_not_gpu
296-
def test_record_data_in_stream_tuple(self) -> None:
297-
curr_stream = torch.cuda.current_stream()
298-
a = torch.tensor([1, 2, 3])
299-
b = torch.tensor([4, 5, 6])
300-
data = (a, b)
301-
302-
with mock.patch.object(
303-
a, "record_stream"
304-
) as mock_record_stream_a, mock.patch.object(
305-
b, "record_stream"
306-
) as mock_record_stream_b:
307-
record_data_in_stream(data, curr_stream)
308-
mock_record_stream_a.assert_called_once()
309-
mock_record_stream_b.assert_called_once()
310-
311-
@skip_if_not_gpu
312-
def test_record_data_in_stream_list(self) -> None:
313-
curr_stream = torch.cuda.current_stream()
314-
a = torch.tensor([1, 2, 3])
315-
b = torch.tensor([4, 5, 6])
316-
data = [a, b]
317-
318-
with mock.patch.object(
319-
a, "record_stream"
320-
) as mock_record_stream_a, mock.patch.object(
321-
b, "record_stream"
322-
) as mock_record_stream_b:
323-
record_data_in_stream(data, curr_stream)
324-
mock_record_stream_a.assert_called_once()
325-
mock_record_stream_b.assert_called_once()
326-
327-
@skip_if_not_gpu
328-
def test_set_float32_precision(self) -> None:
329-
set_float32_precision("highest")
330-
self.assertEqual(torch.get_float32_matmul_precision(), "highest")
331-
self.assertFalse(torch.backends.cudnn.allow_tf32)
332-
self.assertFalse(torch.backends.cuda.matmul.allow_tf32)
333-
334-
set_float32_precision("high")
335-
self.assertEqual(torch.get_float32_matmul_precision(), "high")
336-
self.assertTrue(torch.backends.cudnn.allow_tf32)
337-
self.assertTrue(torch.backends.cuda.matmul.allow_tf32)

0 commit comments

Comments
 (0)