Skip to content

Commit 2bb1e4a

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent 9a7bf40 commit 2bb1e4a

File tree

2 files changed

+26
-21
lines changed

2 files changed

+26
-21
lines changed

src/lightning/pytorch/strategies/deepspeed.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -521,12 +521,16 @@ def model_sharded_context(self) -> Generator[None, None, None]:
521521
import deepspeed
522522

523523
self._init_config_if_needed()
524-
524+
525525
# If detect 'mics_shard_size'>0 in config['zero_optimization'], alter to use deepspeed.zero.MiCS_Init()
526526
# https://deepspeed.readthedocs.io/en/latest/zero3.html#mics-configurations
527527
#! default deepspeed 0.9.0 is not compatible
528-
if 'zero_optimization' in self.config and 'mics_shard_size' in self.config['zero_optimization']\
529-
and self.config['zero_optimization']['mics_shard_size'] > 0 and self.zero_stage_3:
528+
if (
529+
"zero_optimization" in self.config
530+
and "mics_shard_size" in self.config["zero_optimization"]
531+
and self.config["zero_optimization"]["mics_shard_size"] > 0
532+
and self.zero_stage_3
533+
):
530534
with deepspeed.zero.MiCS_Init(
531535
enabled=self.zero_stage_3,
532536
remote_device=self.remote_device,

tests/tests_pytorch/strategies/test_deepspeed.py

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1283,7 +1283,7 @@ def test_deepspeed_load_checkpoint_validate_path(tmp_path):
12831283

12841284
@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
12851285
def test_deepspeed_multigpu_stage_3_MiCS_support(tmp_path):
1286-
"""Test to ensure we can use DeepSpeed with basic ZeRO Stage 3 MiCS Support"""
1286+
"""Test to ensure we can use DeepSpeed with basic ZeRO Stage 3 MiCS Support."""
12871287
model = ModelParallelBoringModel()
12881288
strategy = DeepSpeedStrategy(stage=3)
12891289
strategy.config["zero_optimization"]["stage"] = 3
@@ -1302,11 +1302,11 @@ def test_deepspeed_multigpu_stage_3_MiCS_support(tmp_path):
13021302
)
13031303
trainer.test(model)
13041304
trainer.fit(model)
1305-
1305+
13061306
_assert_save_model_is_equal(model, tmp_path, trainer)
13071307
assert isinstance(trainer.strategy, DeepSpeedStrategy)
1308-
assert 'zero_optimization' in trainer.strategy.config
1309-
assert trainer.strategy.config["zero_optimization"]["mics_hierarchical_params_gather"] == False
1308+
assert "zero_optimization" in trainer.strategy.config
1309+
assert trainer.strategy.config["zero_optimization"]["mics_hierarchical_params_gather"] is False
13101310
assert trainer.strategy.config["zero_optimization"]["mics_shard_size"] == 1
13111311
assert trainer.strategy.config["zero_optimization"]["stage"] == 3
13121312

@@ -1317,9 +1317,9 @@ def test_deepspeed_multigpu_stage_3_MiCS_offload_param_support(tmp_path):
13171317
However, in some past pratice, offload param + mics + torchrun will cause inner exception in multi-node environment. \
13181318
Probably this exception is caused by torchrun, not deepspeed. """
13191319
model = ModelParallelBoringModel()
1320-
strategy = DeepSpeedStrategy(stage=3,offload_params_device="cpu")
1320+
strategy = DeepSpeedStrategy(stage=3, offload_params_device="cpu")
13211321
strategy.config["zero_optimization"]["stage"] = 3
1322-
strategy.config["zero_optimization"]["mics_shard_size"] = 1
1322+
strategy.config["zero_optimization"]["mics_shard_size"] = 1
13231323
strategy.config["zero_optimization"]["mics_hierarchical_params_gather"] = False
13241324
trainer = Trainer(
13251325
default_root_dir=tmp_path,
@@ -1336,18 +1336,19 @@ def test_deepspeed_multigpu_stage_3_MiCS_offload_param_support(tmp_path):
13361336

13371337
_assert_save_model_is_equal(model, tmp_path, trainer)
13381338
assert isinstance(trainer.strategy, DeepSpeedStrategy)
1339-
assert 'zero_optimization' in trainer.strategy.config
1340-
assert trainer.strategy.config["zero_optimization"]["mics_hierarchical_params_gather"] == False
1339+
assert "zero_optimization" in trainer.strategy.config
1340+
assert trainer.strategy.config["zero_optimization"]["mics_hierarchical_params_gather"] is False
13411341
assert trainer.strategy.config["zero_optimization"]["mics_shard_size"] == 1
13421342
assert trainer.strategy.config["zero_optimization"]["stage"] == 3
13431343

1344+
13441345
@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
13451346
def test_deepspeed_multigpu_stage_3_MiCS_offload_param_optimizer_support(tmp_path):
1346-
"""Test to ensure we can use DeepSpeed with ZeRO Stage param & optimizer offload 3 MiCS Support"""
1347+
"""Test to ensure we can use DeepSpeed with ZeRO Stage param & optimizer offload 3 MiCS Support."""
13471348
model = ModelParallelBoringModel()
1348-
strategy = DeepSpeedStrategy(stage=3,offload_params_device="cpu", offload_optimizer_device="cpu")
1349+
strategy = DeepSpeedStrategy(stage=3, offload_params_device="cpu", offload_optimizer_device="cpu")
13491350
strategy.config["zero_optimization"]["stage"] = 3
1350-
strategy.config["zero_optimization"]["mics_shard_size"] = 1
1351+
strategy.config["zero_optimization"]["mics_shard_size"] = 1
13511352
strategy.config["zero_optimization"]["mics_hierarchical_params_gather"] = False
13521353
trainer = Trainer(
13531354
default_root_dir=tmp_path,
@@ -1364,23 +1365,24 @@ def test_deepspeed_multigpu_stage_3_MiCS_offload_param_optimizer_support(tmp_pat
13641365

13651366
_assert_save_model_is_equal(model, tmp_path, trainer)
13661367
assert isinstance(trainer.strategy, DeepSpeedStrategy)
1367-
assert 'zero_optimization' in trainer.strategy.config
1368-
assert trainer.strategy.config["zero_optimization"]["mics_hierarchical_params_gather"] == False
1368+
assert "zero_optimization" in trainer.strategy.config
1369+
assert trainer.strategy.config["zero_optimization"]["mics_hierarchical_params_gather"] is False
13691370
assert trainer.strategy.config["zero_optimization"]["mics_shard_size"] == 1
13701371
assert trainer.strategy.config["zero_optimization"]["stage"] == 3
13711372

13721373

13731374
@RunIf(min_cuda_gpus=4, standalone=True, deepspeed=True)
13741375
def test_deepspeed_multigpu_stage_3_hierarchical_MiCS_support(tmp_path):
1375-
"""Test to ensure we can use DeepSpeed with ZeRO Stage 3 MiCS Support ('mics_hierarchical_params_gather' = True)."""
1376+
"""Test to ensure we can use DeepSpeed with ZeRO Stage 3 MiCS Support ('mics_hierarchical_params_gather' =
1377+
True)."""
13761378
model = ModelParallelBoringModel()
13771379
strategy = DeepSpeedStrategy(stage=3)
13781380
strategy.config["zero_optimization"]["stage"] = 3
13791381
strategy.config["zero_optimization"]["mics_shard_size"] = 2
13801382
strategy.config["zero_optimization"]["offload_param"] = {}
13811383
strategy.config["zero_optimization"]["offload_optimizer"] = {}
13821384
strategy.config["zero_optimization"]["mics_hierarchical_params_gather"] = True
1383-
#Forming a 2 x 2 hierarchy
1385+
# Forming a 2 x 2 hierarchy
13841386
trainer = Trainer(
13851387
default_root_dir=tmp_path,
13861388
strategy=strategy,
@@ -1396,8 +1398,7 @@ def test_deepspeed_multigpu_stage_3_hierarchical_MiCS_support(tmp_path):
13961398

13971399
_assert_save_model_is_equal(model, tmp_path, trainer)
13981400
assert isinstance(trainer.strategy, DeepSpeedStrategy)
1399-
assert 'zero_optimization' in trainer.strategy.config
1400-
assert trainer.strategy.config["zero_optimization"]["mics_hierarchical_params_gather"] == True
1401+
assert "zero_optimization" in trainer.strategy.config
1402+
assert trainer.strategy.config["zero_optimization"]["mics_hierarchical_params_gather"] is True
14011403
assert trainer.strategy.config["zero_optimization"]["mics_shard_size"] == 2
14021404
assert trainer.strategy.config["zero_optimization"]["stage"] == 3
1403-

0 commit comments

Comments
 (0)