-
-
Notifications
You must be signed in to change notification settings - Fork 30
Expand file tree
/
Copy pathexample-binidx.yaml
More file actions
44 lines (39 loc) · 1.38 KB
/
example-binidx.yaml
File metadata and controls
44 lines (39 loc) · 1.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
###############################################
##
## Trainer settings are kept minimal
## with checkpoint / model saving disabled,
## as this tutorial focuses only on datasets
##
## It only runs 10 steps, enough to prove that
## the dataset configs are valid
##
## See the full `config-example.yaml` for more
## detailes on the trainer/model configs
##
###############################################
trainer:
max_steps: 10
# Resonable batch size, for a more realistic it/s rate
target_batch_size: 32
model:
load_model: ../model/L6-D512-world-init.pth
ctx_len: 1024
lr_init: 3e-4
########################################
## Training model settings
########################################
data:
# Directory where the fomratted HF dataset will be saved in
data_path: ../datapath/world/wiki40b/
# Source here points to the binidx file to use (without the .bin / .idx suffix)
source: ../dataset/dataset-config/wiki40b_world_text_document
# Tokenizer to use, use either the inbuilt 'neox', or 'world' tokenizer,
# or 'binidx' for the old binidx formats
# If using a custom tokenizer, provide the tokenizer file path
# ---
tokenizer: binidx
# After loading the dataset, split out test data used for validation,
# This process is skipped if the dataset includes a test split
# Test split normalizes to 1 datasample, if set to 0
test_split: 0
test_split_shuffle: false