@@ -1283,7 +1283,7 @@ def test_deepspeed_load_checkpoint_validate_path(tmp_path):
12831283
12841284@RunIf (min_cuda_gpus = 2 , standalone = True , deepspeed = True )
12851285def test_deepspeed_multigpu_stage_3_MiCS_support (tmp_path ):
1286- """Test to ensure we can use DeepSpeed with basic ZeRO Stage 3 MiCS Support"""
1286+ """Test to ensure we can use DeepSpeed with basic ZeRO Stage 3 MiCS Support. """
12871287 model = ModelParallelBoringModel ()
12881288 strategy = DeepSpeedStrategy (stage = 3 )
12891289 strategy .config ["zero_optimization" ]["stage" ] = 3
@@ -1302,11 +1302,11 @@ def test_deepspeed_multigpu_stage_3_MiCS_support(tmp_path):
13021302 )
13031303 trainer .test (model )
13041304 trainer .fit (model )
1305-
1305+
13061306 _assert_save_model_is_equal (model , tmp_path , trainer )
13071307 assert isinstance (trainer .strategy , DeepSpeedStrategy )
1308- assert ' zero_optimization' in trainer .strategy .config
1309- assert trainer .strategy .config ["zero_optimization" ]["mics_hierarchical_params_gather" ] == False
1308+ assert " zero_optimization" in trainer .strategy .config
1309+ assert trainer .strategy .config ["zero_optimization" ]["mics_hierarchical_params_gather" ] is False
13101310 assert trainer .strategy .config ["zero_optimization" ]["mics_shard_size" ] == 1
13111311 assert trainer .strategy .config ["zero_optimization" ]["stage" ] == 3
13121312
@@ -1317,9 +1317,9 @@ def test_deepspeed_multigpu_stage_3_MiCS_offload_param_support(tmp_path):
13171317 However, in some past pratice, offload param + mics + torchrun will cause inner exception in multi-node environment. \
13181318 Probably this exception is caused by torchrun, not deepspeed. """
13191319 model = ModelParallelBoringModel ()
1320- strategy = DeepSpeedStrategy (stage = 3 ,offload_params_device = "cpu" )
1320+ strategy = DeepSpeedStrategy (stage = 3 , offload_params_device = "cpu" )
13211321 strategy .config ["zero_optimization" ]["stage" ] = 3
1322- strategy .config ["zero_optimization" ]["mics_shard_size" ] = 1
1322+ strategy .config ["zero_optimization" ]["mics_shard_size" ] = 1
13231323 strategy .config ["zero_optimization" ]["mics_hierarchical_params_gather" ] = False
13241324 trainer = Trainer (
13251325 default_root_dir = tmp_path ,
@@ -1336,18 +1336,19 @@ def test_deepspeed_multigpu_stage_3_MiCS_offload_param_support(tmp_path):
13361336
13371337 _assert_save_model_is_equal (model , tmp_path , trainer )
13381338 assert isinstance (trainer .strategy , DeepSpeedStrategy )
1339- assert ' zero_optimization' in trainer .strategy .config
1340- assert trainer .strategy .config ["zero_optimization" ]["mics_hierarchical_params_gather" ] == False
1339+ assert " zero_optimization" in trainer .strategy .config
1340+ assert trainer .strategy .config ["zero_optimization" ]["mics_hierarchical_params_gather" ] is False
13411341 assert trainer .strategy .config ["zero_optimization" ]["mics_shard_size" ] == 1
13421342 assert trainer .strategy .config ["zero_optimization" ]["stage" ] == 3
13431343
1344+
13441345@RunIf (min_cuda_gpus = 2 , standalone = True , deepspeed = True )
13451346def test_deepspeed_multigpu_stage_3_MiCS_offload_param_optimizer_support (tmp_path ):
1346- """Test to ensure we can use DeepSpeed with ZeRO Stage param & optimizer offload 3 MiCS Support"""
1347+ """Test to ensure we can use DeepSpeed with ZeRO Stage param & optimizer offload 3 MiCS Support. """
13471348 model = ModelParallelBoringModel ()
1348- strategy = DeepSpeedStrategy (stage = 3 ,offload_params_device = "cpu" , offload_optimizer_device = "cpu" )
1349+ strategy = DeepSpeedStrategy (stage = 3 , offload_params_device = "cpu" , offload_optimizer_device = "cpu" )
13491350 strategy .config ["zero_optimization" ]["stage" ] = 3
1350- strategy .config ["zero_optimization" ]["mics_shard_size" ] = 1
1351+ strategy .config ["zero_optimization" ]["mics_shard_size" ] = 1
13511352 strategy .config ["zero_optimization" ]["mics_hierarchical_params_gather" ] = False
13521353 trainer = Trainer (
13531354 default_root_dir = tmp_path ,
@@ -1364,23 +1365,24 @@ def test_deepspeed_multigpu_stage_3_MiCS_offload_param_optimizer_support(tmp_pat
13641365
13651366 _assert_save_model_is_equal (model , tmp_path , trainer )
13661367 assert isinstance (trainer .strategy , DeepSpeedStrategy )
1367- assert ' zero_optimization' in trainer .strategy .config
1368- assert trainer .strategy .config ["zero_optimization" ]["mics_hierarchical_params_gather" ] == False
1368+ assert " zero_optimization" in trainer .strategy .config
1369+ assert trainer .strategy .config ["zero_optimization" ]["mics_hierarchical_params_gather" ] is False
13691370 assert trainer .strategy .config ["zero_optimization" ]["mics_shard_size" ] == 1
13701371 assert trainer .strategy .config ["zero_optimization" ]["stage" ] == 3
13711372
13721373
13731374@RunIf (min_cuda_gpus = 4 , standalone = True , deepspeed = True )
13741375def test_deepspeed_multigpu_stage_3_hierarchical_MiCS_support (tmp_path ):
1375- """Test to ensure we can use DeepSpeed with ZeRO Stage 3 MiCS Support ('mics_hierarchical_params_gather' = True)."""
1376+ """Test to ensure we can use DeepSpeed with ZeRO Stage 3 MiCS Support ('mics_hierarchical_params_gather' =
1377+ True)."""
13761378 model = ModelParallelBoringModel ()
13771379 strategy = DeepSpeedStrategy (stage = 3 )
13781380 strategy .config ["zero_optimization" ]["stage" ] = 3
13791381 strategy .config ["zero_optimization" ]["mics_shard_size" ] = 2
13801382 strategy .config ["zero_optimization" ]["offload_param" ] = {}
13811383 strategy .config ["zero_optimization" ]["offload_optimizer" ] = {}
13821384 strategy .config ["zero_optimization" ]["mics_hierarchical_params_gather" ] = True
1383- #Forming a 2 x 2 hierarchy
1385+ # Forming a 2 x 2 hierarchy
13841386 trainer = Trainer (
13851387 default_root_dir = tmp_path ,
13861388 strategy = strategy ,
@@ -1396,8 +1398,7 @@ def test_deepspeed_multigpu_stage_3_hierarchical_MiCS_support(tmp_path):
13961398
13971399 _assert_save_model_is_equal (model , tmp_path , trainer )
13981400 assert isinstance (trainer .strategy , DeepSpeedStrategy )
1399- assert ' zero_optimization' in trainer .strategy .config
1400- assert trainer .strategy .config ["zero_optimization" ]["mics_hierarchical_params_gather" ] == True
1401+ assert " zero_optimization" in trainer .strategy .config
1402+ assert trainer .strategy .config ["zero_optimization" ]["mics_hierarchical_params_gather" ] is True
14011403 assert trainer .strategy .config ["zero_optimization" ]["mics_shard_size" ] == 2
14021404 assert trainer .strategy .config ["zero_optimization" ]["stage" ] == 3
1403-
0 commit comments