-
Notifications
You must be signed in to change notification settings - Fork 27
Expand file tree
/
Copy pathCoLLIE-7B_CodeLLaMA.yaml
More file actions
138 lines (129 loc) · 2.74 KB
/
CoLLIE-7B_CodeLLaMA.yaml
File metadata and controls
138 lines (129 loc) · 2.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#Training args
model_name_or_path: HiTZ/GoLLIE-7B
torch_dtype: bfloat16
use_lora: false
quantization: 4
quantization_inference: null
gradient_checkpointing: true
force_auto_device_map: false
use_flash_attention: true
# dataset arguments
dataset_dir:
/ikerlariak/osainz006/GoLLIE/data/processed_w_examples
train_tasks:
- ace05.eae
- ace05.ee
- ace05.ner
- ace05.rc
- ace05.re
- ace05.ver
- bc5cdr.ner
- conll03.ner
- diann.ner
- ncbidisease.ner
- ontonotes5.ner
- rams.eae
- tacred.sf
- wnut17.ner
validation_tasks:
- ace05.eae
- ace05.ee
- ace05.ner
- ace05.rc
- ace05.re
- ace05.ver
- bc5cdr.ner
- conll03.ner
- diann.ner
- ncbidisease.ner
- ontonotes5.ner
- rams.eae
- tacred.sf
- wikievents.eae
- wikievents.ee
- wikievents.ner
- wnut17.ner
- e3c.ner
- broadtwitter.ner
- fabner.ner
- harveyner.ner
- multinerd.ner
- casie.eae
- casie.ee
- mitmovie.ner
- mitrestaurant.ner
- crossner.crossner_ai
- crossner.crossner_music
- crossner.crossner_politics
- crossner.crossner_literature
- crossner.crossner_natural_science
test_tasks:
- crossner.crossner_ai
- crossner.crossner_music
- crossner.crossner_politics
- crossner.crossner_literature
- crossner.crossner_natural_science
- crossner_womisc.crossner_ai
- crossner_womisc.crossner_music
- crossner_womisc.crossner_politics
- crossner_womisc.crossner_literature
- crossner_womisc.crossner_natural_science
- mitmovie.ner
- mitrestaurant.ner
max_examples_per_task_train: 30000
max_examples_per_task_val: 5000
max_examples_per_task_test: null
max_seq_length: 2048
generation_max_length: 2048
ignore_pad_token_for_loss: true
prompt_loss_weight: 0.0
# checkpoint settings
output_dir: /ikerlariak/osainz006/models/GoLLIE/GoLLIE+-7b_CodeLLaMA
overwrite_output_dir: true
load_best_model_at_end: false
save_strategy: "epoch"
save_steps: 1000
save_total_limit: 999
# evaluation
do_train: false
do_eval: false
do_predict: true
evaluation_strategy: "steps"
eval_steps: 500
eval_delay: 0
predict_with_generate: true
evaluate_all_checkpoints: false
# batch size
per_device_train_batch_size: 32
per_device_eval_batch_size: 8
gradient_accumulation_steps: 1
generation_num_beams: 1
# optimizer settings
optim: adamw_torch_fused
learning_rate: 0.0003
weight_decay: 0.0
num_train_epochs: 3
lr_scheduler_type: cosine
warmup_ratio: 0.03
adam_epsilon: 1e-7
# lora settings
lora_r: 8
lora_alpha: 16
lora_dropout: 0.05
lora_target_modules:
- all
# reporting
logging_strategy: steps
logging_first_step: true
logging_steps: 25
report_to: wandb
run_name: "GoLLIE+-7b_CodeLLaMA"
disable_tqdm: false
# hub settings
push_to_hub: false
resume_from_checkpoint: false
# performance
bf16: true
fp16: false
torch_compile: false
ddp_find_unused_parameters: false