Skip to content

Commit a2ad0fc

Browse files
committed
small changes
1 parent afffcb7 commit a2ad0fc

File tree

1 file changed

+9
-8
lines changed

1 file changed

+9
-8
lines changed

xtra_labs/llm_finetune/draft.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,12 @@
66
import numpy as np
77
import pandas as pd
88
import random
9-
import tensorflow as tf
109
import torch
1110
import torch.nn as nn
1211
import torch.nn.functional as F
1312
from torch.nn import CrossEntropyLoss
1413
from torch.optim import Adam
1514
import transformers
16-
from trl import SFTTrainer
1715
from tqdm import tqdm
1816

1917
from utils import run_benchmark, make_spider_plot
@@ -25,7 +23,6 @@
2523

2624
# model_name = "facebook/opt-1.3b"
2725
model_name = "facebook/opt-125m"
28-
# had to load non TF version to run benchmarking code
2926
model = transformers.AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
3027
tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
3128

@@ -66,7 +63,7 @@ def generate(start_text, model, tokenizer, num_steps=20, temp=1.):
6663
# TEXT: some background on LLM benchmarking
6764
# Load benchmark dataset and evaluate model
6865
benchmark_dataset = pd.read_csv("benchmark.csv")
69-
category_accs_1300m, avg_acc_1300m = run_benchmark(model, tokenizer, benchmark_dataset)
66+
# category_accs_1300m, avg_acc_1300m = run_benchmark(model, tokenizer, benchmark_dataset)
7067

7168
# TEXT: ask them to make a prediction on how accuracy will be affected by different model sizes
7269

@@ -94,7 +91,9 @@ def generate(start_text, model, tokenizer, num_steps=20, temp=1.):
9491

9592
# inspect current model
9693
# print(model)
97-
print(sum(p.numel() for p in model.parameters() if p.requires_grad))
94+
layer = model.lm_head
95+
print(layer.weight.shape)
96+
print(sum(p.numel() for p in layer.parameters() if p.requires_grad))
9897

9998
# # freeze all parameter gradients
10099
for param in model.parameters():
@@ -150,7 +149,8 @@ def replace_linear_with_lora(module):
150149

151150
replace_linear_with_lora(model)
152151

153-
print(sum(p.numel() for p in model.parameters() if p.requires_grad))
152+
layer = model.lm_head
153+
print(sum(p.numel() for p in layer.parameters() if p.requires_grad))
154154

155155
# inspect new model
156156
# print(model)
@@ -169,6 +169,7 @@ def replace_linear_with_lora(module):
169169

170170
model = model.to("cuda")
171171

172+
172173
for epoch in range(num_epochs):
173174
total_loss = 0
174175
num_batches = 0
@@ -212,5 +213,5 @@ def replace_linear_with_lora(module):
212213

213214
# add to spider plot
214215
# benchmark_data = {"350M-Model": category_accs_350m, "1300M-Model": category_accs_1300m, "1300M-Model-Finetuned": category_accs_1300m_ft, "2700M-Model": category_accs_2700m}
215-
benchmark_data = {"350M-Model": category_accs_1300m, "350M-Model-Finetuned": category_accs_1300m_ft}
216-
make_spider_plot(benchmark_data)
216+
# benchmark_data = {"350M-Model": category_accs_1300m, "350M-Model-Finetuned": category_accs_1300m_ft}
217+
# make_spider_plot(benchmark_data)

0 commit comments

Comments
 (0)