Skip to content

Commit a34e9bf

Browse files
authored
🖨 Add Script Utilities section to the documentation (huggingface#2407)
* Add script_utils.md to the documentation * Refactor ScriptArguments class documentation * Refactor TrlParser class to improve code organization and readability
1 parent c10cc89 commit a34e9bf

File tree

4 files changed

+45
-27
lines changed

4 files changed

+45
-27
lines changed

docs/source/_toctree.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@
6565
title: Data Utilities
6666
- local: text_environments
6767
title: Text Environments
68+
- local: script_utils
69+
title: Script Utilities
6870
title: API
6971
- sections:
7072
- local: example_overview

docs/source/script_utils.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# Scripts Utilities
2+
3+
## ScriptArguments
4+
5+
[[autodoc]] ScriptArguments
6+
7+
## TrlParser
8+
9+
[[autodoc]] TrlParser

trl/commands/cli_utils.py

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -151,26 +151,29 @@ class ChatArguments:
151151

152152

153153
class TrlParser(HfArgumentParser):
154+
"""
155+
The TRL parser parses a list of parsers (TrainingArguments, trl.ModelConfig, etc.), creates a config
156+
parsers for users that pass a valid `config` field and merge the values that are set in the config
157+
with the processed parsers.
158+
159+
Args:
160+
parsers (`List[argparse.ArgumentParser]`):
161+
List of parsers.
162+
ignore_extra_args (`bool`):
163+
Whether to ignore extra arguments passed by the config
164+
and not raise errors.
165+
"""
166+
154167
def __init__(self, parsers, ignore_extra_args=False):
155-
"""
156-
The TRL parser parses a list of parsers (TrainingArguments, trl.ModelConfig, etc.), creates a config
157-
parsers for users that pass a valid `config` field and merge the values that are set in the config
158-
with the processed parsers.
159-
160-
Args:
161-
parsers (`list[argparse.ArgumentParser`]):
162-
List of parsers.
163-
ignore_extra_args (`bool`):
164-
Whether to ignore extra arguments passed by the config
165-
and not raise errors.
166-
"""
167168
super().__init__(parsers)
168169
self.yaml_parser = YamlConfigParser()
169170
self.ignore_extra_args = ignore_extra_args
170171

171172
def post_process_dataclasses(self, dataclasses):
172-
# Apply additional post-processing in case some arguments needs a special
173-
# care
173+
"""
174+
Post process dataclasses to merge the TrainingArguments with the SFTScriptArguments or DPOScriptArguments.
175+
"""
176+
174177
training_args = trl_args = None
175178
training_args_index = None
176179

@@ -192,6 +195,9 @@ def post_process_dataclasses(self, dataclasses):
192195
return dataclasses
193196

194197
def parse_args_and_config(self, return_remaining_strings=False):
198+
"""
199+
Parse the command line arguments and the config file.
200+
"""
195201
yaml_config = None
196202
if "--config" in sys.argv:
197203
config_index = sys.argv.index("--config")

trl/utils.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,19 +21,20 @@ class ScriptArguments:
2121
"""
2222
Arguments common to all scripts.
2323
24-
dataset_name (`str`):
25-
Dataset name.
26-
dataset_train_split (`str`, *optional*, defaults to `"train"`):
27-
Dataset split to use for training.
28-
dataset_test_split (`str`, *optional*, defaults to `"test"`):
29-
Dataset split to use for evaluation.
30-
config (`str` or `None`, *optional*, defaults to `None`):
31-
Path to the optional config file.
32-
gradient_checkpointing_use_reentrant (`bool`, *optional*, defaults to `False`):
33-
Whether to apply `use_reentrant` for gradient_checkpointing.
34-
ignore_bias_buffers (`bool`, *optional*, defaults to `False`):
35-
Debug argument for distributed training. Fix for DDP issues with LM bias/mask buffers - invalid scalar type,
36-
inplace operation. See https://github.com/huggingface/transformers/issues/22482#issuecomment-1595790992.
24+
Args:
25+
dataset_name (`str`):
26+
Dataset name.
27+
dataset_train_split (`str`, *optional*, defaults to `"train"`):
28+
Dataset split to use for training.
29+
dataset_test_split (`str`, *optional*, defaults to `"test"`):
30+
Dataset split to use for evaluation.
31+
config (`str` or `None`, *optional*, defaults to `None`):
32+
Path to the optional config file.
33+
gradient_checkpointing_use_reentrant (`bool`, *optional*, defaults to `False`):
34+
Whether to apply `use_reentrant` for gradient_checkpointing.
35+
ignore_bias_buffers (`bool`, *optional*, defaults to `False`):
36+
Debug argument for distributed training. Fix for DDP issues with LM bias/mask buffers - invalid scalar
37+
type, inplace operation. See https://github.com/huggingface/transformers/issues/22482#issuecomment-1595790992.
3738
"""
3839

3940
dataset_name: str

0 commit comments

Comments
 (0)