Skip to content

Commit ebd4ac1

Browse files
author
Felipe Mello
committed
first commit
1 parent 801a454 commit ebd4ac1

File tree

10 files changed

+677
-463
lines changed

10 files changed

+677
-463
lines changed

apps/sft/eval_utils.py

Lines changed: 0 additions & 351 deletions
This file was deleted.

apps/sft/llama3_8b.yaml

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -26,32 +26,30 @@ optimizer:
2626
lr_scheduler:
2727
warmup_steps: 200
2828

29-
# Unified dataset configuration
30-
# First dataset with split='train' is used for training
31-
dataset_val:
32-
datasets:
33-
- name: "train"
34-
path: "yahma/alpaca-cleaned"
35-
split: "train[:95%]"
36-
37-
- name: "val"
38-
path: "yahma/alpaca-cleaned"
39-
split: "train[95%:]"
40-
4129
training:
4230
local_batch_size: 1
4331
seq_len: 2048
4432
max_norm: 1.0
4533
steps: 1000
4634
compile: false
47-
dataset: "c4"
35+
datasets:
36+
- path: "yahma/alpaca-cleaned"
37+
split: "train[:95%]"
38+
39+
eval:
40+
eval_every_n_steps: 5 # (null = disabled)
41+
max_eval_steps: 0 # Max batches per eval dataset (null = run until epoch completes)
42+
batch_size: ${training.local_batch_size} # Batch size for evaluation
43+
datasets:
44+
- path: "yahma/alpaca-cleaned"
45+
split: "train[95%:]"
4846

4947
parallelism:
5048
data_parallel_replicate_degree: 1
5149
data_parallel_shard_degree: -1
52-
tensor_parallel_degree: 1
50+
tensor_parallel_degree: 2
5351
pipeline_parallel_degree: 1
54-
context_parallel_degree: 1
52+
context_parallel_degree: 2
5553
expert_parallel_degree: 1
5654
disable_loss_parallel: false
5755

0 commit comments

Comments
 (0)