7
7
import tensorflow as tf
8
8
import transformers
9
9
10
- from utils import run_benchmark
10
+ from utils import run_benchmark , make_spider_plot
11
11
12
12
# Part 1
13
13
14
14
# TEXT: overview of LLM lab
15
15
# Load pretrained LLM (medium size model)
16
16
17
- model_name = "facebook/opt-1.3b"
18
- model = transformers .TFAutoModelForCausalLM .from_pretrained (model_name )
17
+ model_name = "facebook/opt-1.3b"
18
+ # had to load non TF version to run benchmarking code
19
+ model = transformers .AutoModelForCausalLM .from_pretrained (model_name , device_map = "auto" )
19
20
tokenizer = transformers .AutoTokenizer .from_pretrained (model_name )
20
21
21
22
# TEXT: explain tokenizer
22
23
# Include cell for tokenizer inspection
23
24
24
25
# TEXT: explain how LLMs are trained for next token prediction
25
26
# Write a function to predict next token
26
-
27
- def predict_next_token (probs ):
27
+ def predict_next_token (probs , tokenizer ):
28
28
new_token = np .random .choice (len (probs ), p = probs .numpy ())
29
29
print (tokenizer .decode (new_token ), end = '' , flush = True )
30
30
return new_token
31
31
32
32
# TEXT: explain that next token prediction must be called multiple times for inference
33
33
# Call in loop for autoregressive inference
34
-
35
- def generate (start_text , num_steps = 20 , temp = 1. ):
34
+ def generate (start_text , model , tokenizer , num_steps = 20 , temp = 1. ):
36
35
print (start_text , end = "" )
37
36
x = tokenizer .encode (start_text )
38
37
num_start = len (x )
@@ -42,46 +41,43 @@ def generate(start_text, num_steps=20, temp=1.):
42
41
logits = model (input_tensor ).logits
43
42
probs = tf .nn .softmax (logits / temp )[0 , - 1 , :]
44
43
45
- new_token = predict_next_token (probs )
44
+ new_token = predict_next_token (probs , tokenizer )
46
45
x .append (new_token )
47
46
48
47
output = tokenizer .decode (x [num_start :])
49
48
return output
50
49
51
50
# Test autoregressive generation
52
-
53
51
# while True:
54
52
# print("\n\n\n\n\n")
55
53
# input_text = input("Prompt: ")
56
- # output = generate(input_text)
54
+ # output = generate(input_text, model, tokenizer )
57
55
58
56
# TEXT: some background on LLM benchmarking
59
57
# Load benchmark dataset and evaluate model
60
-
61
58
dataset = pd .read_csv ("benchmark.csv" )
62
- category_accs_1300m , avg_acc_1300m = run_benchmark (model , tokenizer )
59
+ category_accs_1300m , avg_acc_1300m = run_benchmark (model , tokenizer , dataset )
63
60
64
61
# TEXT: ask them to make a prediction on how accuracy will be affected by different model sizes
65
62
66
63
# Benchmark smaller model
67
-
68
64
model_name_350m = "facebook/opt-350m"
69
- model_350m = transformers .TFAutoModelForCausalLM .from_pretrained (model_name_350m )
65
+ model_350m = transformers .AutoModelForCausalLM .from_pretrained (model_name_350m , device_map = "auto" )
70
66
tokenizer_350m = transformers .AutoTokenizer .from_pretrained (model_350m )
71
67
72
- category_accs_350m , avg_acc_350m = run_benchmark (model_350m , tokenizer_350m )
68
+ category_accs_350m , avg_acc_350m = run_benchmark (model_350m , tokenizer_350m , dataset )
73
69
74
70
# Benchmark larger model
75
-
76
71
model_name_2700m = "facebook/opt-2.7b"
77
- model_2700m = transformers .TFAutoModelForCausalLM .from_pretrained (model_name_2700m )
72
+ model_2700m = transformers .AutoModelForCausalLM .from_pretrained (model_name_2700m , device_map = "auto" )
78
73
tokenizer_2700m = transformers .AutoTokenizer .from_pretrained (model_2700m )
79
74
80
- category_accs_2700m , avg_acc_2700m = run_benchmark (model_2700m , tokenizer_2700m )
75
+ category_accs_2700m , avg_acc_2700m = run_benchmark (model_2700m , tokenizer_2700m , dataset )
81
76
82
77
# Spider plot
83
78
84
- print (category_accs_1300m )
79
+ benchmark_data = {"350M-Model" : category_accs_350m , "1300M-Model" : category_accs_1300m , "2700M-Model" : category_accs_2700m }
80
+ make_spider_plot (benchmark_data )
85
81
86
82
# Part 2
87
83
0 commit comments