Skip to content

Training Script stablity 3B and 7B #72

@aamir-gmail

Description

@aamir-gmail

`# Developed by Aamir Mirza

create a conda virtual environment python 3.9

install PyTorch 1.13.1 ( not 2.0)

conda install pytorch==1.13.1 torchvision==0.14.1 torchaudio==0.13.1 pytorch-cuda=11.7 -c pytorch -c nvidia

install the latest transformers

conda install -c conda-forge transformers

install deepspeed from GitHub not pip install

build deepspeed with CPU Adam optimiser support like this

git clone https://github.com/microsoft/DeepSpeed

DS_BUILD_CPU_ADAM=1 pip install .

accelerate via pip

pip install Ninja

conda install -c conda-forge mpi4py

train via commandline for example

deepspeed train_gptNX_v2.py --num_gpus=2

In my case I have 2x 3090 24GB

from transformers import GPTNeoXForCausalLM, GPTNeoXTokenizerFast, TextDataset,
DefaultDataCollator, DataCollatorForLanguageModeling, DataCollatorWithPadding
from transformers import Trainer, TrainingArguments
from datasets import load_dataset
import os

os.environ['OMPI_MCA_opal_cuda_support'] = 'true'
os.environ['TOKENIZERS_PARALLELISM'] = 'false'

If you got a single GPU then change this to one

os.environ["WORLD_SIZE"] = "2"

Change this to your requirement for example 4096 (MAX)

MAX_LEN = 1024

stage2_config = """{
"bf16": {
"enabled": "auto",
"loss_scale": 0,
"loss_scale_window": 1000,
"initial_scale_power": 16,
"hysteresis": 2,
"min_loss_scale": 1
},

"optimizer": {
    "type": "AdamW",
    "params": {
        "lr": "auto",
        "betas": "auto",
        "eps": "auto",
        "weight_decay": "auto"
    }
},

"scheduler": {
    "type": "WarmupLR",
    "params": {
        "warmup_min_lr": "auto",
        "warmup_max_lr": "auto",
        "warmup_num_steps": "auto"
    }
},

"zero_optimization": {
    "stage": 2,
    "offload_optimizer": {
        "device": "cpu",
        "pin_memory": true
    },
    "allgather_partitions": true,
    "allgather_bucket_size": 2e8,
    "overlap_comm": true,
    "reduce_scatter": true,
    "reduce_bucket_size": 2e8,
    "contiguous_gradients": true
},

"gradient_accumulation_steps": "auto",
"gradient_clipping": "auto",
"train_batch_size": "auto",
"train_micro_batch_size_per_gpu": "auto"

} """

class CustomTrainer(Trainer):
def compute_loss(self, model_a, inputs_a, return_outputs=False):
strd = ' '
outputs = model_a(**inputs_a, labels=inputs_a["input_ids"])
loss = outputs.loss
return (loss, outputs) if return_outputs else loss

tokenizer = GPTNeoXTokenizerFast.from_pretrained("stabilityai/stablelm-base-alpha-3b")

def process_data(examples):
texts = examples["text"]
# Remove empty lines
texts = [text for text in texts if len(text) > 0 and not text.isspace()]
# Remove lines that are too long
texts = [text for text in texts if len(text) < 512]
# Remove lines that are too short
texts = [text for text in texts if len(text) > 16]
# add newline character
texts = [text + ' ' + '\n' for text in texts]
examples["text"] = texts
return examples

process dataset columns [text] use tokenizer to get input_ids and attention mask

def process_data_add_mask(examples):
text = examples['text']
tokenizer.pad_token = tokenizer.eos_token
# Tokenize text
encoded_dict = tokenizer(
text,
padding=True,
truncation=True,
max_length=MAX_LEN
)
# Add input_ids and attention_mask to example
examples['input_ids'] = encoded_dict['input_ids']
examples['attention_mask'] = encoded_dict['attention_mask']
return examples

imdb_dataset = load_dataset('imdb')
imdb_dataset_train = imdb_dataset['train']
imdb_dataset_train = imdb_dataset_train.shuffle()
imdb_dataset_train = imdb_dataset_train.map(process_data, batched=True, remove_columns=['label'])
imdb_dataset_val = imdb_dataset['test']
imdb_dataset_val = imdb_dataset_val.shuffle()
imdb_dataset_val = imdb_dataset_val.map(process_data, batched=True, remove_columns=['label'])
train_dataset = imdb_dataset_train.map(process_data_add_mask, remove_columns=["text"], batched=True)
val_dataset = imdb_dataset_val.map(process_data_add_mask, remove_columns=["text"], batched=True)
strs = " "

model = GPTNeoXForCausalLM.from_pretrained("stabilityai/stablelm-base-alpha-3b")

absolute path required for deepspeed config

you can use the JSON above to create your own config

z_optimiser = '/two-tb/train_GPTNX/zeromq_config/stablelm-base-alpha-3b_config.json'
data_collator = DataCollatorWithPadding(tokenizer=tokenizer,
return_tensors="pt")
training_args_v2 = TrainingArguments(
output_dir="./trained_model",
learning_rate=2e-5,
save_total_limit=2,
fp16=True,
per_device_train_batch_size=1,
per_device_eval_batch_size=12,
evaluation_strategy="epoch",
deepspeed=z_optimiser,
num_train_epochs=1
)

Set up the trainer

trainer = CustomTrainer(
model=model,
args=training_args_v2,
train_dataset=train_dataset,
eval_dataset=val_dataset,
data_collator=data_collator,
tokenizer=tokenizer,
)
trainer.train()

trainer.save_model()

`

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions