-
Notifications
You must be signed in to change notification settings - Fork 3.6k
Add Deepspeed Zero 3 MiCS support #20461
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 6 commits
9a7bf40
2bb1e4a
689d61c
ff1efa0
e66dd11
9ccbb1f
39e1e89
5409bc9
6ca2bac
773001a
87eefdc
50a0af7
b65481e
3e9e0a9
10cdeb4
56c08bc
b4ab477
ec0c04e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -521,12 +521,29 @@ def model_sharded_context(self) -> Generator[None, None, None]: | |||
import deepspeed | ||||
|
||||
self._init_config_if_needed() | ||||
with deepspeed.zero.Init( | ||||
enabled=self.zero_stage_3, | ||||
remote_device=self.remote_device, | ||||
config_dict_or_path=self.config, | ||||
assert self.config is not None | ||||
# If detect 'mics_shard_size'>0 in config['zero_optimization'], alter to use deepspeed.zero.MiCS_Init() | ||||
hehepig4 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||
# https://deepspeed.readthedocs.io/en/latest/zero3.html#mics-configurations | ||||
#! default deepspeed 0.9.0 is not compatible | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the min version to support this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for your review. MICS was implemented after ds 0.9.2, and my test env is with ds 0.16.0. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's create another PR with bumping dependency and then we can land this peace
|
||||
if ( | ||||
"zero_optimization" in self.config | ||||
and "mics_shard_size" in self.config["zero_optimization"] | ||||
and self.config["zero_optimization"]["mics_shard_size"] > 0 | ||||
and self.zero_stage_3 | ||||
): | ||||
yield | ||||
with deepspeed.zero.MiCS_Init( | ||||
enabled=self.zero_stage_3, | ||||
remote_device=self.remote_device, | ||||
config_dict_or_path=self.config, | ||||
): | ||||
yield | ||||
else: | ||||
with deepspeed.zero.Init( | ||||
enabled=self.zero_stage_3, | ||||
remote_device=self.remote_device, | ||||
config_dict_or_path=self.config, | ||||
): | ||||
yield | ||||
|
||||
def _set_deepspeed_activation_checkpointing(self) -> None: | ||||
import deepspeed | ||||
|
Uh oh!
There was an error while loading. Please reload this page.