1+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2+ # Customize what is being run
3+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
14
2- RUN_MPS_EAGER=false
5+ DRY_RUN=0
36
4- RUN_CPU_EAGER=true
5- RUN_CPU_COMPILE=false
6- RUN_CPU_AOTI=false
7+ RUN_MPS_EAGER=0
8+
9+ RUN_CPU_EAGER=0
10+ RUN_CPU_COMPILE=0
11+ RUN_CPU_AOTI=0
12+ RUN_CPU_AOTI_PT2=0
713
814# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
9- # Check and Set Up Args (model, out_directory)
15+ # Check and Set Up Args (model, out_directory)
1016# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1117if [ $# -ne 2 ]; then
1218 echo " Please provide (1) model and (2) directory as positional arguments"
@@ -20,7 +26,7 @@ mkdir -p $dir
2026
2127
2228# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
23- # Helpers
29+ # Helpers
2430# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2531
2632# Function for printing and writing to files
@@ -39,13 +45,17 @@ function formatted_export_and_generate {
3945 if [ ! -z " $compile_cmd " ]; then
4046 echo " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" >> $file
4147 echo " $compile_cmd " | tee -a $file
42- eval $compile_cmd >> $file 2>&1
48+ if [ $DRY_RUN -eq 0 ]; then
49+ eval $compile_cmd >> $file 2>&1
50+ fi
4351 fi
4452
4553 # Generate using the Model
4654 echo " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" >> $file
4755 echo $generate_cmd | tee -a $file
48- eval $generate_cmd >> $file 2>&1
56+ if [ $DRY_RUN -eq 0 ]; then
57+ eval $generate_cmd >> $file 2>&1
58+ fi
4959 echo
5060}
5161
@@ -54,14 +64,14 @@ function formatted_export_and_generate {
5464# MPS Eager
5565# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5666
57- if [ " $RUN_MPS_EAGER " = " true " ]; then
67+ if [ $RUN_MPS_EAGER -eq 1 ]; then
5868 echo " MPS Eager 16"
5969 generate_cmd=" python3 torchchat.py generate $model --quantize '{\" precision\" : {\" dtype\" :\" float16\" }, \" executor\" :{\" accelerator\" :\" mps\" }}' --prompt \" Once upon a time,\" --max-new-tokens 256 --num-samples 3"
6070 file=" mps_eager_16.txt"
6171 formatted_export_and_generate " $file " " $generate_cmd "
6272
6373 echo " MPS Eager int8"
64- generate_cmd=" python3 torchchat.py generate $model --quantize '{\" linear:int8\" : {\" groupsize\" : 0}, \" precision\" : {\" dtype\" :\" float16\" }, \" executor\" :{\" accelerator\" :\" mps\" }}' --prompt \" Once upon a time,\" --max-new-tokens 256 --num-samples 3"
74+ generate_cmd=" python3 torchchat.py generate $model --quantize '{\" linear:int8\" : {\" groupsize\" : 0}, \" precision\" : {\" dtype\" :\" float16\" }, \" executor\" :{\" accelerator\" :\" mps\" }}' --prompt \" Once upon a time,\" --max-new-tokens 256 --num-samples 3"
6575 file=" mps_eager_8.txt"
6676 formatted_export_and_generate " $file " " $generate_cmd "
6777
7686# CPU Eager
7787# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
7888
79- if [ " $RUN_CPU_EAGER " = " true " ]; then
89+ if [ $RUN_CPU_EAGER -eq 1 ]; then
8090 echo " CPU Eager 16"
8191 generate_cmd=" python3 torchchat.py generate $model --quantize '{\" precision\" : {\" dtype\" :\" float16\" }, \" executor\" :{\" accelerator\" :\" cpu\" }}' --prompt \" Once upon a time,\" --max-new-tokens 256 --num-samples 3"
8292 file=" cpu_eager_16.txt"
8393 formatted_export_and_generate " $file " " $generate_cmd "
8494
8595 echo " CPU Eager int8"
86- generate_cmd=" python3 torchchat.py generate $model --quantize '{\" linear:int8\" : {\" groupsize\" : 0}, \" precision\" : {\" dtype\" :\" float16\" }, \" executor\" :{\" accelerator\" :\" cpu\" }}' --prompt \" Once upon a time,\" --max-new-tokens 256 --num-samples 3"
96+ generate_cmd=" python3 torchchat.py generate $model --quantize '{\" linear:int8\" : {\" groupsize\" : 0}, \" precision\" : {\" dtype\" :\" float16\" }, \" executor\" :{\" accelerator\" :\" cpu\" }}' --prompt \" Once upon a time,\" --max-new-tokens 256 --num-samples 3"
8797 file=" cpu_eager_8.txt"
8898 formatted_export_and_generate " $file " " $generate_cmd "
8999
97107# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
98108# CPU compile
99109# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
100- if [ " $RUN_CPU_COMPILE " = " true " ]; then
110+ if [ $RUN_CPU_COMPILE -eq 1 ]; then
101111 echo " CPU compile b16"
102112 generate_cmd=" python3 torchchat.py generate $model --quantize '{\" precision\" : {\" dtype\" :\" float16\" }, \" executor\" :{\" accelerator\" :\" cpu\" }}' --prompt \" Once upon a time,\" --max-new-tokens 256 --compile --num-samples 3"
103113 file=" cpu_compile_b16.txt"
104114 formatted_export_and_generate " $file " " $generate_cmd "
105-
115+
106116 echo " CPU compile int8"
107- generate_cmd=" python3 torchchat.py generate $model --quantize '{\" linear:int8\" : {\" groupsize\" : 0}, \" precision\" : {\" dtype\" :\" float16\" }, \" executor\" :{\" accelerator\" :\" cpu\" }}' --prompt \" Once upon a time,\" --max-new-tokens 256 --compile --num-samples 3"
117+ generate_cmd=" python3 torchchat.py generate $model --quantize '{\" linear:int8\" : {\" groupsize\" : 0}, \" precision\" : {\" dtype\" :\" float16\" }, \" executor\" :{\" accelerator\" :\" cpu\" }}' --prompt \" Once upon a time,\" --max-new-tokens 256 --compile --num-samples 3"
108118 file=" cpu_compile_8.txt"
109119 formatted_export_and_generate " $file " " $generate_cmd "
110-
120+
111121 echo " CPU compile int4"
112122 generate_cmd=" python3 torchchat.py generate $model --quantize '{\" linear:int4\" : {\" groupsize\" : 256}, \" precision\" : {\" dtype\" :\" float16\" }, \" executor\" :{\" accelerator\" :\" cpu\" }}' --prompt \" Once upon a time,\" --max-new-tokens 256 --compile --num-samples 3"
113123 file=" cpu_compile_4.txt"
117127# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
118128# CPU AOTI
119129# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
120- if [ " $RUN_CPU_AOTI " = " true " ]; then
130+ if [ $RUN_CPU_AOTI -eq 1 ]; then
121131 echo " CPU aoti b16"
122- compile_cmd=" python3 torchchat.py export $model --quantize '{\" precision\" : {\" dtype\" :\" float16\" }, \" executor\" :{\" accelerator\" :\" cpu\" }}' --output-dso-path /tmp/model16.so"
123- generate_cmd=" python3 torchchat.py generate $model --dso-path /tmp/model16.so --prompt \" Once upon a time,\" --max-new-tokens 256 --device cpu --num-samples 3"
124- file=" cpu_aoti_b16 .txt"
125- formatted_export_and_generate " $file " " $generate_cmd " " $compile_cmd "
126-
132+ compile_cmd=" python3 torchchat.py export $model --quantize '{\" precision\" : {\" dtype\" :\" float16\" }, \" executor\" :{\" accelerator\" :\" cpu\" }}' --output-dso-path /tmp/model16.so"
133+ generate_cmd=" python3 torchchat.py generate $model --dso-path /tmp/model16.so --prompt \" Once upon a time,\" --max-new-tokens 256 --device cpu --num-samples 3"
134+ file=" cpu_aoti_16 .txt"
135+ formatted_export_and_generate " $file " " $generate_cmd " " $compile_cmd "
136+
127137 echo " CPU aoti int8"
128138 compile_cmd=" python3 torchchat.py export $model --quantize '{\" linear:int8\" : {\" groupsize\" : 0}, \" precision\" : {\" dtype\" :\" float16\" }, \" executor\" :{\" accelerator\" :\" cpu\" }}' --output-dso-path /tmp/model8.so"
129139 generate_cmd=" python3 torchchat.py generate $model --dso-path /tmp/model8.so --prompt \" Once upon a time,\" --max-new-tokens 256 --device cpu --num-samples 3"
130140 file=" cpu_aoti_8.txt"
131- formatted_export_and_generate " $file " " $generate_cmd " " $compile_cmd "
132-
141+ formatted_export_and_generate " $file " " $generate_cmd " " $compile_cmd "
142+
133143 echo " CPU aoti int4"
134144 compile_cmd=" python3 torchchat.py export $model --quantize '{\" linear:int4\" : {\" groupsize\" : 256}, \" precision\" : {\" dtype\" :\" float16\" }, \" executor\" :{\" accelerator\" :\" cpu\" }}' --output-dso-path /tmp/model34.so"
135145 generate_cmd=" python3 torchchat.py generate $model --dso-path /tmp/model34.so --prompt \" Once upon a time,\" --max-new-tokens 256 --device cpu --num-samples 3"
136146 file=" cpu_aoti_4.txt"
137- formatted_export_and_generate " $file " " $generate_cmd " " $compile_cmd "
147+ formatted_export_and_generate " $file " " $generate_cmd " " $compile_cmd "
148+ fi
149+
150+
151+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
152+ # CPU AOTI PT2
153+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
154+
155+ if [ $RUN_CPU_AOTI_PT2 -eq 1 ]; then
156+ echo " CPU aoti PT2 b16"
157+ compile_cmd=" python3 torchchat.py export $model --quantize '{\" precision\" : {\" dtype\" :\" float16\" }, \" executor\" :{\" accelerator\" :\" cpu\" }}' --output-aoti-package-path /tmp/model16.pt2"
158+ generate_cmd=" python3 torchchat.py generate $model --aoti-package-path /tmp/model16.pt2 --prompt \" Once upon a time,\" --max-new-tokens 256 --device cpu --num-samples 3"
159+ file=" cpu_aoti_pt2_16.txt"
160+ formatted_export_and_generate " $file " " $generate_cmd " " $compile_cmd "
161+
162+ echo " CPU aoti PT2 int8"
163+ compile_cmd=" python3 torchchat.py export $model --quantize '{\" linear:int8\" : {\" groupsize\" : 0}, \" precision\" : {\" dtype\" :\" float16\" }, \" executor\" :{\" accelerator\" :\" cpu\" }}' --output-aoti-package-path /tmp/model8.pt2"
164+ generate_cmd=" python3 torchchat.py generate $model --aoti-package-path /tmp/model8.pt2 --prompt \" Once upon a time,\" --max-new-tokens 256 --device cpu --num-samples 3"
165+ file=" cpu_aoti_pt2_8.txt"
166+ formatted_export_and_generate " $file " " $generate_cmd " " $compile_cmd "
167+
168+ echo " CPU aoti PT2 int4"
169+ compile_cmd=" python3 torchchat.py export $model --quantize '{\" linear:int4\" : {\" groupsize\" : 256}, \" precision\" : {\" dtype\" :\" float16\" }, \" executor\" :{\" accelerator\" :\" cpu\" }}' --output-aoti-package-path /tmp/model34.pt2"
170+ generate_cmd=" python3 torchchat.py generate $model --aoti-package-path /tmp/model34.pt2 --prompt \" Once upon a time,\" --max-new-tokens 256 --device cpu --num-samples 3"
171+ file=" cpu_aoti_pt2_4.txt"
172+ formatted_export_and_generate " $file " " $generate_cmd " " $compile_cmd "
138173fi
0 commit comments