1+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2+ # Customize what is being run
3+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
14
2- RUN_CUDA_EAGER=true
3- RUN_CUDA_COMPILE=false
4- RUN_CUDA_AOTI=false
5+ DRY_RUN=0
56
6- RUN_CPU_EAGER=true
7- RUN_CPU_COMPILE=false
8- RUN_CPU_AOTI=false
7+ RUN_CUDA_EAGER=1
8+ RUN_CUDA_COMPILE=1
9+ RUN_CUDA_AOTI=1
10+ RUN_CUDA_AOTI_PT2=1
11+
12+ RUN_CPU_EAGER=1
13+ RUN_CPU_COMPILE=1
14+ RUN_CPU_AOTI=1
15+ RUN_CPU_AOTI_PT2=1
916
1017# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1118# Check and Set Up Args (model, out_directory)
@@ -25,10 +32,13 @@ mkdir -p $dir
2532# Helpers
2633# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2734
35+ # Env Variables for Running Commands
36+ ENV_VARIABLE=" OMP_NUM_THREADS=16 numactl --cpunodebind=0 --membind=0"
37+
2838# Function for printing and writing to files
2939function formatted_export_and_generate {
3040 local file=" $dir /$1 "
31- local generate_cmd=" $2 "
41+ local generate_cmd=" ${ENV_VARIABLE} $ 2"
3242 local compile_cmd=" $3 "
3343
3444 # Write Commands to the top of the output file
@@ -41,13 +51,17 @@ function formatted_export_and_generate {
4151 if [ ! -z " $compile_cmd " ]; then
4252 echo " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" >> $file
4353 echo " $compile_cmd " | tee -a $file
44- eval $compile_cmd & >> $file
54+ if [ $DRY_RUN -eq 0 ]; then
55+ eval $compile_cmd & >> $file
56+ fi
4557 fi
4658
4759 # Generate using the Model
4860 echo " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" >> $file
4961 echo $generate_cmd | tee -a $file
50- eval $generate_cmd & >> $file
62+ if [ $DRY_RUN -eq 0 ]; then
63+ eval $generate_cmd & >> $file
64+ fi
5165 echo
5266}
5367
@@ -56,7 +70,7 @@ function formatted_export_and_generate {
5670# Cuda eager
5771# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5872
59- if [ " $RUN_CUDA_EAGER " = " true " ]; then
73+ if [ $RUN_CUDA_EAGER -eq 1 ]; then
6074 echo " Cuda eager b16"
6175 generate_cmd=" python3 torchchat.py generate $model --quantize '{\" precision\" : {\" dtype\" :\" bfloat16\" }, \" executor\" :{\" accelerator\" :\" cuda\" }}' --prompt \" Once upon a time,\" --max-new-tokens 200 --num-samples 3"
6276 file=" cuda_eager_b16.txt"
7892# Cuda compile
7993# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
8094
81- if [ " $RUN_CUDA_COMPILE " = " true " ]; then
95+ if [ $RUN_CUDA_COMPILE -eq 1 ]; then
8296 echo " Cuda compile b16"
8397 generate_cmd=" python3 torchchat.py generate $model --quantize '{\" precision\" : {\" dtype\" :\" bfloat16\" }, \" executor\" :{\" accelerator\" :\" cuda\" }}' --prompt \" Once upon a time,\" --max-new-tokens 200 --compile --num-samples 3"
8498 file=" cuda_compile_b16.txt"
100114# CPU eager
101115# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
102116
103- if [ " $RUN_CPU_EAGER " = " true " ]; then
117+ if [ $RUN_CPU_EAGER -eq 1 ]; then
104118 echo " CPU eager b16"
105119 generate_cmd=" python3 torchchat.py generate $model --quantize '{\" precision\" : {\" dtype\" :\" bfloat16\" }, \" executor\" :{\" accelerator\" :\" cpu\" }}' --prompt \" Once upon a time,\" --max-new-tokens 256 --num-samples 3"
106120 file=" cpu_eager_b16.txt"
122136# CPU compile
123137# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
124138
125- if [ " $RUN_CPU_COMPILE " = " true " ]; then
139+ if [ $RUN_CPU_COMPILE -eq 1 ]; then
126140 echo " CPU compile b16"
127141 generate_cmd=" python3 torchchat.py generate $model --quantize '{\" precision\" : {\" dtype\" :\" bfloat16\" }, \" executor\" :{\" accelerator\" :\" cpu\" }}' --prompt \" Once upon a time,\" --max-new-tokens 256 --compile --num-samples 3"
128142 file=" cpu_compile_b16.txt"
144158# Cuda AOTI
145159# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
146160
147- if [ " $RUN_CUDA_AOTI " = " true " ]; then
161+ if [ $RUN_CUDA_AOTI -eq 1 ]; then
148162 echo " Cuda aoti b16"
149163 compile_cmd=" python3 torchchat.py export $model --quantize '{\" precision\" : {\" dtype\" :\" bfloat16\" }, \" executor\" :{\" accelerator\" :\" cuda\" }}' --output-dso-path /tmp/model16.so"
150164 generate_cmd=" python3 torchchat.py generate $model --dso-path /tmp/model16.so --prompt \" Once upon a time,\" --max-new-tokens 200 --device cuda --num-samples 3"
@@ -165,11 +179,36 @@ if [ "$RUN_CUDA_AOTI" = "true" ]; then
165179fi
166180
167181
182+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
183+ # Cuda AOTI PT2
184+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
185+
186+ if [ $RUN_CUDA_AOTI_PT2 -eq 1 ]; then
187+ echo " Cuda aoti PT2 b16"
188+ compile_cmd=" python3 torchchat.py export $model --quantize '{\" precision\" : {\" dtype\" :\" bfloat16\" }, \" executor\" :{\" accelerator\" :\" cuda\" }}' --output-aoti-package-path /tmp/model16.pt2"
189+ generate_cmd=" python3 torchchat.py generate $model --aoti-package-path /tmp/model16.pt2 --prompt \" Once upon a time,\" --max-new-tokens 200 --device cuda --num-samples 3"
190+ file=" cuda_aoti_pt2_b16.txt"
191+ formatted_export_and_generate " $file " " $generate_cmd " " $compile_cmd "
192+
193+ echo " Cuda aoti PT2 int8"
194+ compile_cmd=" python3 torchchat.py export $model --quantize '{\" linear:int8\" : {\" groupsize\" : 0}, \" precision\" : {\" dtype\" :\" bfloat16\" }, \" executor\" :{\" accelerator\" :\" cuda\" }}' --output-aoti-package-path /tmp/model8.pt2"
195+ generate_cmd=" python3 torchchat.py generate $model --aoti-package-path /tmp/model8.pt2 --prompt \" Once upon a time,\" --max-new-tokens 200 --device cuda --num-samples 3"
196+ file=" cuda_aoti_pt2_8.txt"
197+ formatted_export_and_generate " $file " " $generate_cmd " " $compile_cmd "
198+
199+ echo " Cuda aoti PT2 int4"
200+ compile_cmd=" python3 torchchat.py export $model --quantize '{\" linear:int4\" : {\" groupsize\" : 256}, \" precision\" : {\" dtype\" :\" bfloat16\" }, \" executor\" :{\" accelerator\" :\" cuda\" }}' --output-aoti-package-path /tmp/model34.pt2"
201+ generate_cmd=" python3 torchchat.py generate $model --aoti-package-path /tmp/model34.pt2 --prompt \" Once upon a time,\" --max-new-tokens 200 --device cuda --num-samples 3"
202+ file=" cuda_aoti_pt2_4.txt"
203+ formatted_export_and_generate " $file " " $generate_cmd " " $compile_cmd "
204+ fi
205+
206+
168207# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
169208# CPU AOTI
170209# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
171210
172- if [ " $RUN_CPU_AOTI " = " true " ]; then
211+ if [ $RUN_CPU_AOTI -eq 1 ]; then
173212 echo " CPU aoti b16"
174213 compile_cmd=" python3 torchchat.py export $model --quantize '{\" precision\" : {\" dtype\" :\" bfloat16\" }, \" executor\" :{\" accelerator\" :\" cpu\" }}' --output-dso-path /tmp/model16.so"
175214 generate_cmd=" python3 torchchat.py generate $model --dso-path /tmp/model16.so --prompt \" Once upon a time,\" --max-new-tokens 256 --device cpu --num-samples 3"
@@ -188,3 +227,28 @@ if [ "$RUN_CPU_AOTI" = "true" ]; then
188227 file=" cpu_aoti_4.txt"
189228 formatted_export_and_generate " $file " " $generate_cmd " " $compile_cmd "
190229fi
230+
231+
232+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
233+ # CPU AOTI PT2
234+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
235+
236+ if [ $RUN_CPU_AOTI_PT2 -eq 1 ]; then
237+ echo " CPU aoti PT2 b16"
238+ compile_cmd=" python3 torchchat.py export $model --quantize '{\" precision\" : {\" dtype\" :\" bfloat16\" }, \" executor\" :{\" accelerator\" :\" cpu\" }}' --output-pt2-package-path /tmp/model16.pt2"
239+ generate_cmd=" python3 torchchat.py generate $model --pt2-package-path /tmp/model16.pt2 --prompt \" Once upon a time,\" --max-new-tokens 256 --device cpu --num-samples 3"
240+ file=" cpu_aoti_b16.txt"
241+ formatted_export_and_generate " $file " " $generate_cmd " " $compile_cmd "
242+
243+ echo " CPU aoti PT2 int8"
244+ compile_cmd=" python3 torchchat.py export $model --quantize '{\" linear:int8\" : {\" groupsize\" : 0}, \" precision\" : {\" dtype\" :\" bfloat16\" }, \" executor\" :{\" accelerator\" :\" cpu\" }}' --output-pt2-package-path /tmp/model8.pt2"
245+ generate_cmd=" python3 torchchat.py generate $model --pt2-package-path /tmp/model8.pt2 --prompt \" Once upon a time,\" --max-new-tokens 256 --device cpu --num-samples 3"
246+ file=" cpu_aoti_8.txt"
247+ formatted_export_and_generate " $file " " $generate_cmd " " $compile_cmd "
248+
249+ echo " CPU aoti PT2 int4"
250+ compile_cmd=" python3 torchchat.py export $model --quantize '{\" linear:int4\" : {\" groupsize\" : 256}, \" precision\" : {\" dtype\" :\" bfloat16\" }, \" executor\" :{\" accelerator\" :\" cpu\" }}' --output-pt2-package-path /tmp/model34.pt2"
251+ generate_cmd=" python3 torchchat.py generate $model --pt2-package-path /tmp/model34.pt2 --prompt \" Once upon a time,\" --max-new-tokens 256 --device cpu --num-samples 3"
252+ file=" cpu_aoti_4.txt"
253+ formatted_export_and_generate " $file " " $generate_cmd " " $compile_cmd "
254+ fi
0 commit comments