Add GPU support for map (#18947)

tchaton · thomas · lantiga · commit 672e730ec1b5 · 2023-11-06T10:21:58.000-05:00
Co-authored-by: thomas <thomas@thomass-MacBook-Pro.local> (cherry picked from commit 97c730e)
diff --git a/src/lightning/data/streaming/functions.py b/src/lightning/data/streaming/functions.py
@@ -18,6 +18,8 @@
 from types import FunctionType
 from typing import Any, Callable, Optional, Sequence, Union
 
+import torch
+
 from lightning.data.streaming.constants import _LIGHTNING_CLOUD_GREATER_EQUAL_0_5_50, _TORCH_GREATER_EQUAL_2_1_0
 from lightning.data.streaming.data_processor import DataChunkRecipe, DataProcessor, DataTransformRecipe
 
@@ -53,12 +55,37 @@ def __init__(self, fn: Callable[[str, Any], None], inputs: Sequence[Any]):
         super().__init__()
         self._fn = fn
         self._inputs = inputs
+        self._device: Optional[str] = None
+
+        _fn = self._fn if isinstance(self._fn, FunctionType) else self._fn.__call__  # type: ignore
+        params = inspect.signature(_fn).parameters
+        self._contains_device = "device" in params
 
     def prepare_structure(self, input_dir: Optional[str]) -> Any:
         return self._inputs
 
     def prepare_item(self, output_dir: str, item_metadata: Any) -> None:  # type: ignore
-        self._fn(output_dir, item_metadata)
+        if self._contains_device and self._device is None:
+            self._find_device()
+        if isinstance(self._fn, FunctionType):
+            if self._contains_device:
+                self._fn(output_dir, item_metadata, self._device)
+            else:
+                self._fn(output_dir, item_metadata)
+        elif callable(self._fn):
+            if self._contains_device:
+                self._fn.__call__(output_dir, item_metadata, self._device)  # type: ignore
+            else:
+                self._fn.__call__(output_dir, item_metadata)  # type: ignore
+        else:
+            raise ValueError(f"The provided {self._fn} isn't supported.")
+
+    def _find_device(self) -> None:
+        global_rank = os.getenv("DATA_OPTIMIZER_GLOBAL_RANK", None)
+        if torch.cuda.is_available() and global_rank:
+            num_gpus = torch.cuda.device_count()
+            device = int(global_rank) % num_gpus
+            self._device = f"cuda:{device}"
 
 
 class LambdaDataChunkRecipe(DataChunkRecipe):
diff --git a/tests/tests_data/streaming/test_data_processor.py b/tests/tests_data/streaming/test_data_processor.py
@@ -20,7 +20,7 @@
     _upload_fn,
     _wait_for_file_to_exist,
 )
-from lightning.data.streaming.functions import map, optimize
+from lightning.data.streaming.functions import LambdaDataTransformRecipe, map, optimize
 from lightning_utilities.core.imports import RequirementCache
 
 _PIL_AVAILABLE = RequirementCache("PIL")
@@ -766,3 +766,44 @@ def test_data_processing_optimize_class_yield(monkeypatch, tmpdir):
 
     cache = Cache(output_dir, chunk_size=1)
     assert len(cache) == 5
+
+
+def test_lambda_transform_recipe(monkeypatch):
+    torch_mock = mock.MagicMock()
+    torch_mock.cuda.device_count.return_value = 3
+
+    monkeypatch.setattr(functions, "torch", torch_mock)
+    monkeypatch.setenv("DATA_OPTIMIZER_GLOBAL_RANK", 2)
+
+    called = False
+
+    def fn(output_dir, item, device):
+        nonlocal called
+        assert device == "cuda:2"
+        called = True
+
+    data_recipe = LambdaDataTransformRecipe(fn, range(1))
+
+    data_recipe.prepare_item("", 1)
+    assert called
+
+
+def test_lambda_transform_recipe_class(monkeypatch):
+    torch_mock = mock.MagicMock()
+    torch_mock.cuda.device_count.return_value = 3
+
+    monkeypatch.setattr(functions, "torch", torch_mock)
+    monkeypatch.setenv("DATA_OPTIMIZER_GLOBAL_RANK", 2)
+
+    called = False
+
+    class Transform:
+        def __call__(self, output_dir, item, device):
+            nonlocal called
+            assert device == "cuda:2"
+            called = True
+
+    data_recipe = LambdaDataTransformRecipe(Transform(), range(1))
+
+    data_recipe.prepare_item("", 1)
+    assert called