Skip to content

Commit 80b4edb

Browse files
authored
Post release fixes (axolotl-ai-cloud#2581)
* fix missing kwarg on child * make the runpod test shorter * update docs * rename runpod test json file * typing fixes and ordering of doc
1 parent fedbcc0 commit 80b4edb

File tree

4 files changed

+27
-20
lines changed

4 files changed

+27
-20
lines changed

.runpod/tests.json renamed to .runpod/test-input.json

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,22 +12,22 @@
1212
"base_model": "HuggingFaceTB/SmolLM2-135M",
1313
"model_type": "AutoModelForCausalLM",
1414
"tokenizer_type": "AutoTokenizer",
15-
"load_in_8bit": true,
16-
"load_in_4bit": false,
15+
"load_in_4bit": true,
1716
"strict": false,
1817
"datasets": [
1918
{
2019
"path": "mhenrichsen/alpaca_2k_test",
21-
"type": "alpaca"
20+
"type": "alpaca",
21+
"split": "train[:10%]"
2222
}
2323
],
24-
"val_set_size": 0.05,
24+
"val_set_size": 0.02,
2525
"output_dir": "./outputs/lora-out",
2626
"sequence_len": 4096,
2727
"sample_packing": true,
2828
"eval_sample_packing": false,
2929
"pad_to_sequence_len": true,
30-
"adapter": "lora",
30+
"adapter": "qlora",
3131
"lora_r": 32,
3232
"lora_alpha": 64,
3333
"lora_dropout": 0.05,
@@ -36,8 +36,8 @@
3636
"embed_tokens",
3737
"lm_head"
3838
],
39-
"gradient_accumulation_steps": 4,
40-
"micro_batch_size": 2,
39+
"gradient_accumulation_steps": 2,
40+
"micro_batch_size": 1,
4141
"num_epochs": 1,
4242
"optimizer": "adamw_torch_fused",
4343
"lr_scheduler": "cosine",
@@ -56,7 +56,8 @@
5656
"weight_decay": 0.0,
5757
"special_tokens": {
5858
"pad_token": "<|endoftext|>"
59-
}
59+
},
60+
"max_steps": 20
6061
},
6162
"timeout": 100000
6263
},

docs/config.qmd

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,10 @@ datasets:
184184
# adding a system turn with empty content.
185185
drop_system_message:
186186

187+
# Optional[bool]. Whether to split the assistant turn based on a reasoning trace inside delimited tags
188+
# defaults to False
189+
split_thinking:
190+
187191
# IMPORTANT: The following fields determine which parts of the conversation to train on.
188192
# Priority order: message_field_training > message_field_training_detail > train_on_inputs or role in roles_to_train
189193
# See examples at `docs/dataset-formats/conversation.qmd`

src/axolotl/integrations/kd/chat_template.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ def __init__(
3737
train_on_eos=None,
3838
train_on_eot=None,
3939
eot_tokens=None,
40+
split_thinking: bool | None = False,
4041
logprobs_field="logprobs",
4142
gen_temperature=1.0,
4243
kd_temperature=1.0,
@@ -54,6 +55,7 @@ def __init__(
5455
train_on_eos=train_on_eos,
5556
train_on_eot=train_on_eot,
5657
eot_tokens=eot_tokens,
58+
split_thinking=split_thinking,
5759
)
5860

5961
@property

src/axolotl/prompt_strategies/chat_template.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import logging
66
from collections import defaultdict
7-
from typing import Any, Dict, List, Optional, Set, Union
7+
from typing import Any, Dict, List, Set, Union
88

99
from pydantic import BaseModel
1010
from transformers import ProcessorMixin
@@ -29,12 +29,12 @@ def __init__(
2929
chat_template: str,
3030
processor=None,
3131
max_length=2048,
32-
message_property_mappings: Optional[Dict[str, str]] = None,
33-
message_field_training: Optional[str] = None,
34-
message_field_training_detail: Optional[str] = None,
32+
message_property_mappings: Dict[str, str] | None = None,
33+
message_field_training: str | None = None,
34+
message_field_training_detail: str | None = None,
3535
field_messages: str = "messages",
3636
field_system: str = "system",
37-
roles: Optional[Dict[str, List[str]]] = None,
37+
roles: Dict[str, List[str]] | None = None,
3838
drop_system_message: bool = False,
3939
):
4040
# check if message_property_mappings is None or empty dict
@@ -65,7 +65,7 @@ def __init__(
6565
self.field_messages = field_messages
6666
self.field_system = field_system
6767
self.tokenizer = tokenizer
68-
self.processor: Optional[ProcessorMixin] = processor
68+
self.processor: ProcessorMixin | None = processor
6969
self.chat_template = chat_template
7070
self.max_length = max_length
7171
self.drop_system_message = drop_system_message
@@ -224,11 +224,11 @@ def __init__(
224224
tokenizer,
225225
train_on_inputs: bool,
226226
sequence_len: int,
227-
roles_to_train: Optional[List[str]] = None,
228-
train_on_eos: Optional[str] = None,
229-
train_on_eot: Optional[str] = None,
230-
eot_tokens: Optional[List[str]] = None,
231-
split_thinking: Optional[bool] = False,
227+
roles_to_train: list[str] | None = None,
228+
train_on_eos: str | None = None,
229+
train_on_eot: str | None = None,
230+
eot_tokens: list[str] | None = None,
231+
split_thinking: bool | None = False,
232232
):
233233
super().__init__(prompter, tokenizer, train_on_inputs, sequence_len)
234234
self.prompter: ChatTemplatePrompter = prompter
@@ -714,7 +714,7 @@ def __call__(
714714
self,
715715
tokenizer,
716716
cfg,
717-
ds_cfg: Optional[Union[Dict[str, Any], DatasetConfig]] = None,
717+
ds_cfg: Union[Dict[str, Any], DatasetConfig] | None = None,
718718
processor=None,
719719
):
720720
if ds_cfg is None:

0 commit comments

Comments
 (0)