|
7 | 7 |
|
8 | 8 | # pyre-strict
|
9 | 9 |
|
10 |
| -import dataclasses |
11 |
| -import os |
12 | 10 | import unittest
|
13 |
| -from collections import defaultdict, namedtuple |
14 |
| -from dataclasses import dataclass |
15 |
| -from typing import Any, Dict |
16 | 11 | from unittest import mock
|
17 |
| -from unittest.mock import patch |
18 | 12 |
|
19 | 13 | import torch
|
20 | 14 | from torchtnt.utils.device import (
|
21 |
| - copy_data_to_device, |
22 | 15 | get_device_from_env,
|
23 | 16 | get_nvidia_smi_gpu_stats,
|
24 | 17 | get_psutil_cpu_stats,
|
25 |
| - record_data_in_stream, |
26 |
| - set_float32_precision, |
27 | 18 | )
|
28 |
| -from torchtnt.utils.test_utils import skip_if_not_gpu |
29 | 19 |
|
30 | 20 |
|
31 | 21 | class DeviceTest(unittest.TestCase):
|
32 |
| - @patch("torch.cuda.is_available", return_value=False) |
33 |
| - def test_get_cpu_device(self, _) -> None: |
| 22 | + def test_get_cpu_device(self) -> None: |
34 | 23 | device = get_device_from_env()
|
35 | 24 | self.assertEqual(device.type, "cpu")
|
36 | 25 | self.assertEqual(device.index, None)
|
37 | 26 |
|
38 |
| - @skip_if_not_gpu |
39 |
| - def test_get_gpu_device(self) -> None: |
40 |
| - device_idx = torch.cuda.device_count() - 1 |
41 |
| - self.assertGreaterEqual(device_idx, 0) |
42 |
| - with mock.patch.dict(os.environ, {"LOCAL_RANK": str(device_idx)}, clear=True): |
43 |
| - device = get_device_from_env() |
44 |
| - self.assertEqual(device.type, "cuda") |
45 |
| - self.assertEqual(device.index, device_idx) |
46 |
| - self.assertEqual(device.index, torch.cuda.current_device()) |
47 |
| - |
48 |
| - invalid_device_idx = device_idx + 10 |
49 |
| - with mock.patch.dict(os.environ, {"LOCAL_RANK": str(invalid_device_idx)}): |
50 |
| - with self.assertRaises( |
51 |
| - RuntimeError, |
52 |
| - msg="The local rank is larger than the number of available GPUs", |
53 |
| - ): |
54 |
| - device = get_device_from_env() |
55 |
| - |
56 |
| - # Test that we fall back to 0 if LOCAL_RANK is not specified |
57 |
| - device = get_device_from_env() |
58 |
| - self.assertEqual(device.type, "cuda") |
59 |
| - self.assertEqual(device.index, 0) |
60 |
| - self.assertEqual(device.index, torch.cuda.current_device()) |
61 |
| - |
62 |
| - @skip_if_not_gpu |
63 |
| - def test_copy_data_to_device_tensor(self) -> None: |
64 |
| - cuda_0 = torch.device("cuda:0") |
65 |
| - a = torch.tensor([1, 2, 3]) |
66 |
| - self.assertEqual(a.device.type, "cpu") |
67 |
| - a = copy_data_to_device(a, cuda_0) |
68 |
| - self.assertEqual(a.device.type, "cuda") |
69 |
| - |
70 |
| - @skip_if_not_gpu |
71 |
| - def test_copy_data_to_device_module(self) -> None: |
72 |
| - cuda_0 = torch.device("cuda:0") |
73 |
| - model = torch.nn.Linear(1, 1) |
74 |
| - for param in model.parameters(): |
75 |
| - self.assertEqual(param.device.type, "cpu") |
76 |
| - model = copy_data_to_device(model, cuda_0) |
77 |
| - for param in model.parameters(): |
78 |
| - self.assertEqual(param.device.type, "cuda") |
79 |
| - |
80 |
| - @skip_if_not_gpu |
81 |
| - def test_copy_data_to_device_list(self) -> None: |
82 |
| - cuda_0 = torch.device("cuda:0") |
83 |
| - b = torch.tensor([1, 2, 3]) |
84 |
| - c = torch.tensor([4, 5, 6]) |
85 |
| - original_list = [b, c] |
86 |
| - self.assertEqual(b.device.type, "cpu") |
87 |
| - self.assertEqual(c.device.type, "cpu") |
88 |
| - new_list = copy_data_to_device(original_list, cuda_0) |
89 |
| - for elem in new_list: |
90 |
| - self.assertEqual(elem.device.type, "cuda") |
91 |
| - |
92 |
| - @skip_if_not_gpu |
93 |
| - def test_copy_data_to_device_tuple(self) -> None: |
94 |
| - cuda_0 = torch.device("cuda:0") |
95 |
| - d = torch.tensor([1, 2, 3]) |
96 |
| - e = torch.tensor([4, 5, 6]) |
97 |
| - original_tuple = (d, e) |
98 |
| - self.assertEqual(d.device.type, "cpu") |
99 |
| - self.assertEqual(e.device.type, "cpu") |
100 |
| - new_tuple = copy_data_to_device(original_tuple, cuda_0) |
101 |
| - for elem in new_tuple: |
102 |
| - self.assertEqual(elem.device.type, "cuda") |
103 |
| - |
104 |
| - @skip_if_not_gpu |
105 |
| - def test_copy_data_to_device_dict(self) -> None: |
106 |
| - cuda_0 = torch.device("cuda:0") |
107 |
| - f = torch.tensor([1, 2, 3]) |
108 |
| - g = torch.tensor([4, 5, 6]) |
109 |
| - original_dict = {"f": f, "g": g} |
110 |
| - self.assertEqual(f.device.type, "cpu") |
111 |
| - self.assertEqual(g.device.type, "cpu") |
112 |
| - new_dict = copy_data_to_device(original_dict, cuda_0) |
113 |
| - for key in new_dict.keys(): |
114 |
| - self.assertEqual(new_dict[key].device.type, "cuda") |
115 |
| - |
116 |
| - @skip_if_not_gpu |
117 |
| - def test_copy_data_to_device_named_tuple(self) -> None: |
118 |
| - cuda_0 = torch.device("cuda:0") |
119 |
| - |
120 |
| - # named tuple of tensors |
121 |
| - h = torch.tensor([1, 2, 3]) |
122 |
| - i = torch.tensor([4, 5, 6]) |
123 |
| - tensor_tuple = namedtuple("tensor_tuple", ["tensor_a", "tensor_b"]) |
124 |
| - original_named_tuple = tensor_tuple(h, i) |
125 |
| - self.assertEqual(h.device.type, "cpu") |
126 |
| - self.assertEqual(i.device.type, "cpu") |
127 |
| - new_named_tuple = copy_data_to_device(original_named_tuple, cuda_0) |
128 |
| - for elem in new_named_tuple: |
129 |
| - self.assertEqual(elem.device.type, "cuda") |
130 |
| - |
131 |
| - self.assertIsNotNone(new_named_tuple.tensor_a) |
132 |
| - self.assertIsNotNone(new_named_tuple.tensor_b) |
133 |
| - self.assertEqual(type(original_named_tuple), type(new_named_tuple)) |
134 |
| - |
135 |
| - @skip_if_not_gpu |
136 |
| - def test_copy_data_to_device_dataclass(self) -> None: |
137 |
| - cuda_0 = torch.device("cuda:0") |
138 |
| - |
139 |
| - # dataclass of tensors |
140 |
| - @dataclass |
141 |
| - class TestTensorDataClass: |
142 |
| - val: torch.Tensor |
143 |
| - |
144 |
| - original_data_class = TestTensorDataClass( |
145 |
| - val=torch.tensor([1, 2, 3]), |
146 |
| - ) |
147 |
| - self.assertEqual(original_data_class.val.device.type, "cpu") |
148 |
| - new_data_class = copy_data_to_device(original_data_class, cuda_0) |
149 |
| - self.assertEqual(new_data_class.val.device.type, "cuda") |
150 |
| - |
151 |
| - # frozen dataclass |
152 |
| - @dataclass(frozen=True) |
153 |
| - class FrozenDataClass: |
154 |
| - val: torch.Tensor |
155 |
| - |
156 |
| - original_data_class = FrozenDataClass( |
157 |
| - val=torch.tensor([1, 2, 3]), |
158 |
| - ) |
159 |
| - self.assertEqual(original_data_class.val.device.type, "cpu") |
160 |
| - new_data_class = copy_data_to_device(original_data_class, cuda_0) |
161 |
| - self.assertEqual(new_data_class.val.device.type, "cuda") |
162 |
| - |
163 |
| - # no-init field |
164 |
| - @dataclass |
165 |
| - class NoInitDataClass: |
166 |
| - val: torch.Tensor = dataclasses.field(init=False) |
167 |
| - |
168 |
| - def __post_init__(self): |
169 |
| - self.val = torch.tensor([0, 1]) |
170 |
| - |
171 |
| - original_data_class = NoInitDataClass() |
172 |
| - original_data_class.val = torch.tensor([1, 2]) |
173 |
| - self.assertEqual(original_data_class.val.device.type, "cpu") |
174 |
| - new_data_class = copy_data_to_device(original_data_class, cuda_0) |
175 |
| - self.assertEqual(new_data_class.val.device.type, "cuda") |
176 |
| - self.assertTrue( |
177 |
| - torch.equal(new_data_class.val, torch.tensor([1, 2], device=cuda_0)) |
178 |
| - ) |
179 |
| - |
180 |
| - @skip_if_not_gpu |
181 |
| - def test_copy_data_to_device_defaultdict(self) -> None: |
182 |
| - cuda_0 = torch.device("cuda:0") |
183 |
| - |
184 |
| - dd = defaultdict(torch.Tensor) |
185 |
| - dd[1] = torch.tensor([1, 2, 3]) |
186 |
| - # dd[2] takes the default value, an empty tensor |
187 |
| - _ = dd[2] |
188 |
| - |
189 |
| - self.assertEqual(dd[1].device.type, "cpu") |
190 |
| - self.assertEqual(dd[2].device.type, "cpu") |
191 |
| - |
192 |
| - new_dd = copy_data_to_device(dd, cuda_0) |
193 |
| - |
194 |
| - self.assertEqual(new_dd[1].device, cuda_0) |
195 |
| - self.assertEqual(new_dd[2].device, cuda_0) |
196 |
| - |
197 |
| - # make sure the type of new keys is the same |
198 |
| - self.assertEqual(type(dd[3]), type(new_dd[3])) |
199 |
| - |
200 |
| - @skip_if_not_gpu |
201 |
| - def test_copy_data_to_device_nested(self) -> None: |
202 |
| - h = torch.tensor([1, 2, 3]) |
203 |
| - i = torch.tensor([4, 5, 6]) |
204 |
| - j = torch.tensor([7, 8, 9]) |
205 |
| - k = torch.tensor([10, 11]) |
206 |
| - m = torch.tensor([12, 13]) |
207 |
| - n = torch.tensor([14, 15]) |
208 |
| - self.assertEqual(h.device.type, "cpu") |
209 |
| - self.assertEqual(i.device.type, "cpu") |
210 |
| - self.assertEqual(j.device.type, "cpu") |
211 |
| - self.assertEqual(k.device.type, "cpu") |
212 |
| - self.assertEqual(m.device.type, "cpu") |
213 |
| - self.assertEqual(n.device.type, "cpu") |
214 |
| - |
215 |
| - nested_list = [(h, i), (j, k)] |
216 |
| - nested_dict = {"1": nested_list, "2": [m], "3": n, "4": 2.0, "5": "string"} |
217 |
| - |
218 |
| - @dataclass |
219 |
| - class NestedDataClass: |
220 |
| - dict_container: Dict[str, Any] |
221 |
| - |
222 |
| - nested_data_class = NestedDataClass(dict_container=nested_dict) |
223 |
| - |
224 |
| - cuda_0 = torch.device("cuda:0") |
225 |
| - new_data_class = copy_data_to_device(nested_data_class, cuda_0) |
226 |
| - for val in new_data_class.dict_container.values(): |
227 |
| - if isinstance(val, list): |
228 |
| - for list_item in val: |
229 |
| - if isinstance(list_item, torch.Tensor): |
230 |
| - self.assertEqual(list_item.device.type, "cuda") |
231 |
| - if isinstance(list_item, tuple): |
232 |
| - for tuple_item in list_item: |
233 |
| - print(tuple_item) |
234 |
| - self.assertEqual(tuple_item.device.type, "cuda") |
235 |
| - elif isinstance(val, torch.Tensor): |
236 |
| - self.assertEqual(val.device.type, "cuda") |
237 |
| - # check that float is unchanged |
238 |
| - elif isinstance(val, float): |
239 |
| - self.assertEqual(val, 2.0) |
240 |
| - # check that string is unchanged |
241 |
| - elif isinstance(val, str): |
242 |
| - self.assertEqual(val, "string") |
243 |
| - |
244 | 27 | def test_get_cpu_stats(self) -> None:
|
245 | 28 | """Get CPU stats, check that values are populated."""
|
246 | 29 | cpu_stats = get_psutil_cpu_stats()
|
@@ -275,63 +58,3 @@ def test_get_gpu_stats(self) -> None:
|
275 | 58 | self.assertGreaterEqual(gpu_stats["memory_free_mb"], 0)
|
276 | 59 | self.assertGreaterEqual(gpu_stats["temperature_gpu_celsius"], 0)
|
277 | 60 | self.assertGreaterEqual(gpu_stats["temperature_memory_celsius"], 0)
|
278 |
| - |
279 |
| - @skip_if_not_gpu |
280 |
| - def test_record_data_in_stream_dict(self) -> None: |
281 |
| - curr_stream = torch.cuda.current_stream() |
282 |
| - a = torch.tensor([1, 2, 3]) |
283 |
| - b = torch.tensor([4, 5, 6]) |
284 |
| - data = {"a": a, "b": b} |
285 |
| - |
286 |
| - with mock.patch.object( |
287 |
| - a, "record_stream" |
288 |
| - ) as mock_record_stream_a, mock.patch.object( |
289 |
| - b, "record_stream" |
290 |
| - ) as mock_record_stream_b: |
291 |
| - record_data_in_stream(data, curr_stream) |
292 |
| - mock_record_stream_a.assert_called_once() |
293 |
| - mock_record_stream_b.assert_called_once() |
294 |
| - |
295 |
| - @skip_if_not_gpu |
296 |
| - def test_record_data_in_stream_tuple(self) -> None: |
297 |
| - curr_stream = torch.cuda.current_stream() |
298 |
| - a = torch.tensor([1, 2, 3]) |
299 |
| - b = torch.tensor([4, 5, 6]) |
300 |
| - data = (a, b) |
301 |
| - |
302 |
| - with mock.patch.object( |
303 |
| - a, "record_stream" |
304 |
| - ) as mock_record_stream_a, mock.patch.object( |
305 |
| - b, "record_stream" |
306 |
| - ) as mock_record_stream_b: |
307 |
| - record_data_in_stream(data, curr_stream) |
308 |
| - mock_record_stream_a.assert_called_once() |
309 |
| - mock_record_stream_b.assert_called_once() |
310 |
| - |
311 |
| - @skip_if_not_gpu |
312 |
| - def test_record_data_in_stream_list(self) -> None: |
313 |
| - curr_stream = torch.cuda.current_stream() |
314 |
| - a = torch.tensor([1, 2, 3]) |
315 |
| - b = torch.tensor([4, 5, 6]) |
316 |
| - data = [a, b] |
317 |
| - |
318 |
| - with mock.patch.object( |
319 |
| - a, "record_stream" |
320 |
| - ) as mock_record_stream_a, mock.patch.object( |
321 |
| - b, "record_stream" |
322 |
| - ) as mock_record_stream_b: |
323 |
| - record_data_in_stream(data, curr_stream) |
324 |
| - mock_record_stream_a.assert_called_once() |
325 |
| - mock_record_stream_b.assert_called_once() |
326 |
| - |
327 |
| - @skip_if_not_gpu |
328 |
| - def test_set_float32_precision(self) -> None: |
329 |
| - set_float32_precision("highest") |
330 |
| - self.assertEqual(torch.get_float32_matmul_precision(), "highest") |
331 |
| - self.assertFalse(torch.backends.cudnn.allow_tf32) |
332 |
| - self.assertFalse(torch.backends.cuda.matmul.allow_tf32) |
333 |
| - |
334 |
| - set_float32_precision("high") |
335 |
| - self.assertEqual(torch.get_float32_matmul_precision(), "high") |
336 |
| - self.assertTrue(torch.backends.cudnn.allow_tf32) |
337 |
| - self.assertTrue(torch.backends.cuda.matmul.allow_tf32) |
0 commit comments