Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.

Commit 6798c81

Browse files
committed
update
1 parent 0f21304 commit 6798c81

File tree

1 file changed

+79
-15
lines changed

1 file changed

+79
-15
lines changed

scripts/benchmarking/benchmarking_linux.sh

Lines changed: 79 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,18 @@
1+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2+
# Customize what is being run
3+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
14

2-
RUN_CUDA_EAGER=true
3-
RUN_CUDA_COMPILE=false
4-
RUN_CUDA_AOTI=false
5+
DRY_RUN=0
56

6-
RUN_CPU_EAGER=true
7-
RUN_CPU_COMPILE=false
8-
RUN_CPU_AOTI=false
7+
RUN_CUDA_EAGER=1
8+
RUN_CUDA_COMPILE=1
9+
RUN_CUDA_AOTI=1
10+
RUN_CUDA_AOTI_PT2=1
11+
12+
RUN_CPU_EAGER=1
13+
RUN_CPU_COMPILE=1
14+
RUN_CPU_AOTI=1
15+
RUN_CPU_AOTI_PT2=1
916

1017
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1118
# Check and Set Up Args (model, out_directory)
@@ -25,10 +32,13 @@ mkdir -p $dir
2532
# Helpers
2633
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2734

35+
# Env Variables for Running Commands
36+
ENV_VARIABLE="OMP_NUM_THREADS=16 numactl --cpunodebind=0 --membind=0"
37+
2838
# Function for printing and writing to files
2939
function formatted_export_and_generate {
3040
local file="$dir/$1"
31-
local generate_cmd="$2"
41+
local generate_cmd="${ENV_VARIABLE} $2"
3242
local compile_cmd="$3"
3343

3444
# Write Commands to the top of the output file
@@ -41,13 +51,17 @@ function formatted_export_and_generate {
4151
if [ ! -z "$compile_cmd" ]; then
4252
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" >> $file
4353
echo "$compile_cmd" | tee -a $file
44-
eval $compile_cmd &>> $file
54+
if [ $DRY_RUN -eq 0 ]; then
55+
eval $compile_cmd &>> $file
56+
fi
4557
fi
4658

4759
# Generate using the Model
4860
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" >> $file
4961
echo $generate_cmd | tee -a $file
50-
eval $generate_cmd &>> $file
62+
if [ $DRY_RUN -eq 0 ]; then
63+
eval $generate_cmd &>> $file
64+
fi
5165
echo
5266
}
5367

@@ -56,7 +70,7 @@ function formatted_export_and_generate {
5670
# Cuda eager
5771
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5872

59-
if [ "$RUN_CUDA_EAGER" = "true" ]; then
73+
if [ $RUN_CUDA_EAGER -eq 1 ]; then
6074
echo "Cuda eager b16"
6175
generate_cmd="python3 torchchat.py generate $model --quantize '{\"precision\": {\"dtype\":\"bfloat16\"}, \"executor\":{\"accelerator\":\"cuda\"}}' --prompt \"Once upon a time,\" --max-new-tokens 200 --num-samples 3"
6276
file="cuda_eager_b16.txt"
@@ -78,7 +92,7 @@ fi
7892
# Cuda compile
7993
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
8094

81-
if [ "$RUN_CUDA_COMPILE" = "true" ]; then
95+
if [ $RUN_CUDA_COMPILE -eq 1 ]; then
8296
echo "Cuda compile b16"
8397
generate_cmd="python3 torchchat.py generate $model --quantize '{\"precision\": {\"dtype\":\"bfloat16\"}, \"executor\":{\"accelerator\":\"cuda\"}}' --prompt \"Once upon a time,\" --max-new-tokens 200 --compile --num-samples 3"
8498
file="cuda_compile_b16.txt"
@@ -100,7 +114,7 @@ fi
100114
# CPU eager
101115
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
102116

103-
if [ "$RUN_CPU_EAGER" = "true" ]; then
117+
if [ $RUN_CPU_EAGER -eq 1 ]; then
104118
echo "CPU eager b16"
105119
generate_cmd="python3 torchchat.py generate $model --quantize '{\"precision\": {\"dtype\":\"bfloat16\"}, \"executor\":{\"accelerator\":\"cpu\"}}' --prompt \"Once upon a time,\" --max-new-tokens 256 --num-samples 3"
106120
file="cpu_eager_b16.txt"
@@ -122,7 +136,7 @@ fi
122136
# CPU compile
123137
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
124138

125-
if [ "$RUN_CPU_COMPILE" = "true" ]; then
139+
if [ $RUN_CPU_COMPILE -eq 1 ]; then
126140
echo "CPU compile b16"
127141
generate_cmd="python3 torchchat.py generate $model --quantize '{\"precision\": {\"dtype\":\"bfloat16\"}, \"executor\":{\"accelerator\":\"cpu\"}}' --prompt \"Once upon a time,\" --max-new-tokens 256 --compile --num-samples 3"
128142
file="cpu_compile_b16.txt"
@@ -144,7 +158,7 @@ fi
144158
# Cuda AOTI
145159
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
146160

147-
if [ "$RUN_CUDA_AOTI" = "true" ]; then
161+
if [ $RUN_CUDA_AOTI -eq 1 ]; then
148162
echo "Cuda aoti b16"
149163
compile_cmd="python3 torchchat.py export $model --quantize '{\"precision\": {\"dtype\":\"bfloat16\"}, \"executor\":{\"accelerator\":\"cuda\"}}' --output-dso-path /tmp/model16.so"
150164
generate_cmd="python3 torchchat.py generate $model --dso-path /tmp/model16.so --prompt \"Once upon a time,\" --max-new-tokens 200 --device cuda --num-samples 3"
@@ -165,11 +179,36 @@ if [ "$RUN_CUDA_AOTI" = "true" ]; then
165179
fi
166180

167181

182+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
183+
# Cuda AOTI PT2
184+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
185+
186+
if [ $RUN_CUDA_AOTI_PT2 -eq 1 ]; then
187+
echo "Cuda aoti PT2 b16"
188+
compile_cmd="python3 torchchat.py export $model --quantize '{\"precision\": {\"dtype\":\"bfloat16\"}, \"executor\":{\"accelerator\":\"cuda\"}}' --output-aoti-package-path /tmp/model16.pt2"
189+
generate_cmd="python3 torchchat.py generate $model --aoti-package-path /tmp/model16.pt2 --prompt \"Once upon a time,\" --max-new-tokens 200 --device cuda --num-samples 3"
190+
file="cuda_aoti_pt2_b16.txt"
191+
formatted_export_and_generate "$file" "$generate_cmd" "$compile_cmd"
192+
193+
echo "Cuda aoti PT2 int8"
194+
compile_cmd="python3 torchchat.py export $model --quantize '{\"linear:int8\": {\"groupsize\": 0}, \"precision\": {\"dtype\":\"bfloat16\"}, \"executor\":{\"accelerator\":\"cuda\"}}' --output-aoti-package-path /tmp/model8.pt2"
195+
generate_cmd="python3 torchchat.py generate $model --aoti-package-path /tmp/model8.pt2 --prompt \"Once upon a time,\" --max-new-tokens 200 --device cuda --num-samples 3"
196+
file="cuda_aoti_pt2_8.txt"
197+
formatted_export_and_generate "$file" "$generate_cmd" "$compile_cmd"
198+
199+
echo "Cuda aoti PT2 int4"
200+
compile_cmd="python3 torchchat.py export $model --quantize '{\"linear:int4\": {\"groupsize\": 256}, \"precision\": {\"dtype\":\"bfloat16\"}, \"executor\":{\"accelerator\":\"cuda\"}}' --output-aoti-package-path /tmp/model34.pt2"
201+
generate_cmd="python3 torchchat.py generate $model --aoti-package-path /tmp/model34.pt2 --prompt \"Once upon a time,\" --max-new-tokens 200 --device cuda --num-samples 3"
202+
file="cuda_aoti_pt2_4.txt"
203+
formatted_export_and_generate "$file" "$generate_cmd" "$compile_cmd"
204+
fi
205+
206+
168207
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
169208
# CPU AOTI
170209
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
171210

172-
if [ "$RUN_CPU_AOTI" = "true" ]; then
211+
if [ $RUN_CPU_AOTI -eq 1 ]; then
173212
echo "CPU aoti b16"
174213
compile_cmd="python3 torchchat.py export $model --quantize '{\"precision\": {\"dtype\":\"bfloat16\"}, \"executor\":{\"accelerator\":\"cpu\"}}' --output-dso-path /tmp/model16.so"
175214
generate_cmd="python3 torchchat.py generate $model --dso-path /tmp/model16.so --prompt \"Once upon a time,\" --max-new-tokens 256 --device cpu --num-samples 3"
@@ -188,3 +227,28 @@ if [ "$RUN_CPU_AOTI" = "true" ]; then
188227
file="cpu_aoti_4.txt"
189228
formatted_export_and_generate "$file" "$generate_cmd" "$compile_cmd"
190229
fi
230+
231+
232+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
233+
# CPU AOTI PT2
234+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
235+
236+
if [ $RUN_CPU_AOTI_PT2 -eq 1 ]; then
237+
echo "CPU aoti PT2 b16"
238+
compile_cmd="python3 torchchat.py export $model --quantize '{\"precision\": {\"dtype\":\"bfloat16\"}, \"executor\":{\"accelerator\":\"cpu\"}}' --output-pt2-package-path /tmp/model16.pt2"
239+
generate_cmd="python3 torchchat.py generate $model --pt2-package-path /tmp/model16.pt2 --prompt \"Once upon a time,\" --max-new-tokens 256 --device cpu --num-samples 3"
240+
file="cpu_aoti_b16.txt"
241+
formatted_export_and_generate "$file" "$generate_cmd" "$compile_cmd"
242+
243+
echo "CPU aoti PT2 int8"
244+
compile_cmd="python3 torchchat.py export $model --quantize '{\"linear:int8\": {\"groupsize\": 0}, \"precision\": {\"dtype\":\"bfloat16\"}, \"executor\":{\"accelerator\":\"cpu\"}}' --output-pt2-package-path /tmp/model8.pt2"
245+
generate_cmd="python3 torchchat.py generate $model --pt2-package-path /tmp/model8.pt2 --prompt \"Once upon a time,\" --max-new-tokens 256 --device cpu --num-samples 3"
246+
file="cpu_aoti_8.txt"
247+
formatted_export_and_generate "$file" "$generate_cmd" "$compile_cmd"
248+
249+
echo "CPU aoti PT2 int4"
250+
compile_cmd="python3 torchchat.py export $model --quantize '{\"linear:int4\": {\"groupsize\": 256}, \"precision\": {\"dtype\":\"bfloat16\"}, \"executor\":{\"accelerator\":\"cpu\"}}' --output-pt2-package-path /tmp/model34.pt2"
251+
generate_cmd="python3 torchchat.py generate $model --pt2-package-path /tmp/model34.pt2 --prompt \"Once upon a time,\" --max-new-tokens 256 --device cpu --num-samples 3"
252+
file="cpu_aoti_4.txt"
253+
formatted_export_and_generate "$file" "$generate_cmd" "$compile_cmd"
254+
fi

0 commit comments

Comments
 (0)