6
6
import numpy as np
7
7
import pandas as pd
8
8
import random
9
- import tensorflow as tf
10
9
import torch
11
10
import torch .nn as nn
12
11
import torch .nn .functional as F
13
12
from torch .nn import CrossEntropyLoss
14
13
from torch .optim import Adam
15
14
import transformers
16
- from trl import SFTTrainer
17
15
from tqdm import tqdm
18
16
19
17
from utils import run_benchmark , make_spider_plot
25
23
26
24
# model_name = "facebook/opt-1.3b"
27
25
model_name = "facebook/opt-125m"
28
- # had to load non TF version to run benchmarking code
29
26
model = transformers .AutoModelForCausalLM .from_pretrained (model_name , device_map = "auto" )
30
27
tokenizer = transformers .AutoTokenizer .from_pretrained (model_name )
31
28
@@ -66,7 +63,7 @@ def generate(start_text, model, tokenizer, num_steps=20, temp=1.):
66
63
# TEXT: some background on LLM benchmarking
67
64
# Load benchmark dataset and evaluate model
68
65
benchmark_dataset = pd .read_csv ("benchmark.csv" )
69
- category_accs_1300m , avg_acc_1300m = run_benchmark (model , tokenizer , benchmark_dataset )
66
+ # category_accs_1300m, avg_acc_1300m = run_benchmark(model, tokenizer, benchmark_dataset)
70
67
71
68
# TEXT: ask them to make a prediction on how accuracy will be affected by different model sizes
72
69
@@ -94,7 +91,9 @@ def generate(start_text, model, tokenizer, num_steps=20, temp=1.):
94
91
95
92
# inspect current model
96
93
# print(model)
97
- print (sum (p .numel () for p in model .parameters () if p .requires_grad ))
94
+ layer = model .lm_head
95
+ print (layer .weight .shape )
96
+ print (sum (p .numel () for p in layer .parameters () if p .requires_grad ))
98
97
99
98
# # freeze all parameter gradients
100
99
for param in model .parameters ():
@@ -150,7 +149,8 @@ def replace_linear_with_lora(module):
150
149
151
150
replace_linear_with_lora (model )
152
151
153
- print (sum (p .numel () for p in model .parameters () if p .requires_grad ))
152
+ layer = model .lm_head
153
+ print (sum (p .numel () for p in layer .parameters () if p .requires_grad ))
154
154
155
155
# inspect new model
156
156
# print(model)
@@ -169,6 +169,7 @@ def replace_linear_with_lora(module):
169
169
170
170
model = model .to ("cuda" )
171
171
172
+
172
173
for epoch in range (num_epochs ):
173
174
total_loss = 0
174
175
num_batches = 0
@@ -212,5 +213,5 @@ def replace_linear_with_lora(module):
212
213
213
214
# add to spider plot
214
215
# benchmark_data = {"350M-Model": category_accs_350m, "1300M-Model": category_accs_1300m, "1300M-Model-Finetuned": category_accs_1300m_ft, "2700M-Model": category_accs_2700m}
215
- benchmark_data = {"350M-Model" : category_accs_1300m , "350M-Model-Finetuned" : category_accs_1300m_ft }
216
- make_spider_plot (benchmark_data )
216
+ # benchmark_data = {"350M-Model": category_accs_1300m, "350M-Model-Finetuned": category_accs_1300m_ft}
217
+ # make_spider_plot(benchmark_data)
0 commit comments