-
Notifications
You must be signed in to change notification settings - Fork 345
Expand file tree
/
Copy pathrl_basic.py
More file actions
42 lines (34 loc) · 1.2 KB
/
rl_basic.py
File metadata and controls
42 lines (34 loc) · 1.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import asyncio
import chz
import sys
from tinker_cookbook import cli_utils, model_info
from tinker_cookbook.recipes.math_rl.math_env import Gsm8kDatasetBuilder
from tinker_cookbook.rl import train
def build_config_blueprint() -> chz.Blueprint[train.Config]:
model_name = "meta-llama/Llama-3.1-8B"
renderer_name = model_info.get_recommended_renderer_name(model_name)
builder = Gsm8kDatasetBuilder(
batch_size=128,
group_size=16,
renderer_name=renderer_name,
model_name_for_tokenizer=model_name,
)
return chz.Blueprint(train.Config).apply(
{
"model_name": model_name,
"renderer_name": renderer_name,
"log_path": "/tmp/tinker-examples/rl_basic",
"dataset_builder": builder,
"learning_rate": 4e-5,
"max_tokens": 256,
"eval_every": 0,
}
)
def main(config: train.Config):
# Avoid clobbering log dir from your previous run:
cli_utils.check_log_dir(config.log_path, behavior_if_exists="ask")
asyncio.run(train.main(config))
if __name__ == "__main__":
blueprint = build_config_blueprint()
blueprint.make_from_argv(sys.argv[1:])
main(blueprint.make())