update

Jack-Khuu · Jack-Khuu · commit 6798c816f357 · 2024-11-13T18:11:53.000-08:00
diff --git a/scripts/benchmarking/benchmarking_linux.sh b/scripts/benchmarking/benchmarking_linux.sh
@@ -1,11 +1,18 @@
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# Customize what is being run 
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-RUN_CUDA_EAGER=true
-RUN_CUDA_COMPILE=false
-RUN_CUDA_AOTI=false
+DRY_RUN=0
 
-RUN_CPU_EAGER=true
-RUN_CPU_COMPILE=false
-RUN_CPU_AOTI=false
+RUN_CUDA_EAGER=1
+RUN_CUDA_COMPILE=1
+RUN_CUDA_AOTI=1
+RUN_CUDA_AOTI_PT2=1
+
+RUN_CPU_EAGER=1
+RUN_CPU_COMPILE=1
+RUN_CPU_AOTI=1
+RUN_CPU_AOTI_PT2=1
 
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 # Check and Set Up Args (model, out_directory) 
@@ -25,10 +32,13 @@ mkdir -p $dir
 # Helpers 
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+# Env Variables for Running Commands
+ENV_VARIABLE="OMP_NUM_THREADS=16 numactl --cpunodebind=0 --membind=0"
+
 # Function for printing and writing to files
 function formatted_export_and_generate {
   local file="$dir/$1"
-  local generate_cmd="$2"
+  local generate_cmd="${ENV_VARIABLE} $2"
   local compile_cmd="$3"
 
   # Write Commands to the top of the output file
@@ -41,13 +51,17 @@ function formatted_export_and_generate {
   if [ ! -z "$compile_cmd" ]; then
     echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" >> $file
     echo "$compile_cmd" | tee -a $file
-    eval $compile_cmd &>> $file 
+    if [ $DRY_RUN -eq 0 ]; then
+      eval $compile_cmd &>> $file 
+    fi
   fi
 
   # Generate using the Model
   echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" >> $file
   echo $generate_cmd | tee -a $file
-  eval $generate_cmd &>> $file
+    if [ $DRY_RUN -eq 0 ]; then
+      eval $generate_cmd &>> $file
+    fi
   echo
 }
 
@@ -56,7 +70,7 @@ function formatted_export_and_generate {
 # Cuda eager
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  
-if [ "$RUN_CUDA_EAGER" = "true" ]; then
+if [ $RUN_CUDA_EAGER -eq 1 ]; then
   echo "Cuda eager b16"
   generate_cmd="python3 torchchat.py generate $model --quantize '{\"precision\": {\"dtype\":\"bfloat16\"}, \"executor\":{\"accelerator\":\"cuda\"}}' --prompt \"Once upon a time,\" --max-new-tokens 200 --num-samples 3"
   file="cuda_eager_b16.txt"
@@ -78,7 +92,7 @@ fi
 # Cuda compile
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-if [ "$RUN_CUDA_COMPILE" = "true" ]; then
+if [ $RUN_CUDA_COMPILE -eq 1 ]; then
   echo "Cuda compile b16"
   generate_cmd="python3 torchchat.py generate $model --quantize '{\"precision\": {\"dtype\":\"bfloat16\"}, \"executor\":{\"accelerator\":\"cuda\"}}' --prompt \"Once upon a time,\" --max-new-tokens 200 --compile --num-samples 3"
   file="cuda_compile_b16.txt"
@@ -100,7 +114,7 @@ fi
 # CPU eager 
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-if [ "$RUN_CPU_EAGER" = "true" ]; then
+if [ $RUN_CPU_EAGER -eq 1 ]; then
   echo "CPU eager b16"
   generate_cmd="python3 torchchat.py generate $model --quantize '{\"precision\": {\"dtype\":\"bfloat16\"}, \"executor\":{\"accelerator\":\"cpu\"}}' --prompt \"Once upon a time,\" --max-new-tokens 256 --num-samples 3"
   file="cpu_eager_b16.txt"
@@ -122,7 +136,7 @@ fi
 # CPU compile
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-if [ "$RUN_CPU_COMPILE" = "true" ]; then
+if [ $RUN_CPU_COMPILE -eq 1 ]; then
   echo "CPU compile b16"
   generate_cmd="python3 torchchat.py generate $model --quantize '{\"precision\": {\"dtype\":\"bfloat16\"}, \"executor\":{\"accelerator\":\"cpu\"}}' --prompt \"Once upon a time,\" --max-new-tokens 256 --compile --num-samples 3"
   file="cpu_compile_b16.txt"
@@ -144,7 +158,7 @@ fi
 # Cuda AOTI
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-if [ "$RUN_CUDA_AOTI" = "true" ]; then
+if [ $RUN_CUDA_AOTI -eq 1 ]; then
   echo "Cuda aoti b16"
   compile_cmd="python3 torchchat.py export $model --quantize '{\"precision\": {\"dtype\":\"bfloat16\"}, \"executor\":{\"accelerator\":\"cuda\"}}' --output-dso-path /tmp/model16.so" 
   generate_cmd="python3 torchchat.py generate $model --dso-path /tmp/model16.so --prompt \"Once upon a time,\" --max-new-tokens 200 --device cuda --num-samples 3" 
@@ -165,11 +179,36 @@ if [ "$RUN_CUDA_AOTI" = "true" ]; then
 fi
 
 
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# Cuda AOTI PT2
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+if [ $RUN_CUDA_AOTI_PT2 -eq 1 ]; then
+  echo "Cuda aoti PT2 b16"
+  compile_cmd="python3 torchchat.py export $model --quantize '{\"precision\": {\"dtype\":\"bfloat16\"}, \"executor\":{\"accelerator\":\"cuda\"}}' --output-aoti-package-path /tmp/model16.pt2" 
+  generate_cmd="python3 torchchat.py generate $model --aoti-package-path /tmp/model16.pt2 --prompt \"Once upon a time,\" --max-new-tokens 200 --device cuda --num-samples 3" 
+  file="cuda_aoti_pt2_b16.txt"
+  formatted_export_and_generate "$file" "$generate_cmd" "$compile_cmd" 
+  
+  echo "Cuda aoti PT2 int8"
+  compile_cmd="python3 torchchat.py export $model --quantize '{\"linear:int8\": {\"groupsize\": 0}, \"precision\": {\"dtype\":\"bfloat16\"}, \"executor\":{\"accelerator\":\"cuda\"}}' --output-aoti-package-path /tmp/model8.pt2"
+  generate_cmd="python3 torchchat.py generate $model --aoti-package-path /tmp/model8.pt2 --prompt \"Once upon a time,\" --max-new-tokens 200 --device cuda --num-samples 3"
+  file="cuda_aoti_pt2_8.txt"
+  formatted_export_and_generate "$file" "$generate_cmd" "$compile_cmd" 
+  
+  echo "Cuda aoti PT2 int4"
+  compile_cmd="python3 torchchat.py export $model --quantize '{\"linear:int4\": {\"groupsize\": 256}, \"precision\": {\"dtype\":\"bfloat16\"}, \"executor\":{\"accelerator\":\"cuda\"}}' --output-aoti-package-path /tmp/model34.pt2"
+  generate_cmd="python3 torchchat.py generate $model --aoti-package-path /tmp/model34.pt2 --prompt \"Once upon a time,\" --max-new-tokens 200 --device cuda --num-samples 3"
+  file="cuda_aoti_pt2_4.txt"
+  formatted_export_and_generate "$file" "$generate_cmd" "$compile_cmd" 
+fi
+
+
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 # CPU AOTI
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-if [ "$RUN_CPU_AOTI" = "true" ]; then
+if [ $RUN_CPU_AOTI -eq 1 ]; then
   echo "CPU aoti b16"
   compile_cmd="python3 torchchat.py export $model --quantize '{\"precision\": {\"dtype\":\"bfloat16\"}, \"executor\":{\"accelerator\":\"cpu\"}}' --output-dso-path /tmp/model16.so" 
   generate_cmd="python3 torchchat.py generate $model --dso-path /tmp/model16.so --prompt \"Once upon a time,\" --max-new-tokens 256 --device cpu --num-samples 3" 
@@ -188,3 +227,28 @@ if [ "$RUN_CPU_AOTI" = "true" ]; then
   file="cpu_aoti_4.txt"
   formatted_export_and_generate "$file" "$generate_cmd" "$compile_cmd" 
 fi
+
+
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# CPU AOTI PT2
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+if [ $RUN_CPU_AOTI_PT2 -eq 1 ]; then
+  echo "CPU aoti PT2 b16"
+  compile_cmd="python3 torchchat.py export $model --quantize '{\"precision\": {\"dtype\":\"bfloat16\"}, \"executor\":{\"accelerator\":\"cpu\"}}' --output-pt2-package-path /tmp/model16.pt2" 
+  generate_cmd="python3 torchchat.py generate $model --pt2-package-path /tmp/model16.pt2 --prompt \"Once upon a time,\" --max-new-tokens 256 --device cpu --num-samples 3" 
+  file="cpu_aoti_b16.txt"
+  formatted_export_and_generate "$file" "$generate_cmd" "$compile_cmd" 
+  
+  echo "CPU aoti PT2 int8"
+  compile_cmd="python3 torchchat.py export $model --quantize '{\"linear:int8\": {\"groupsize\": 0}, \"precision\": {\"dtype\":\"bfloat16\"}, \"executor\":{\"accelerator\":\"cpu\"}}' --output-pt2-package-path /tmp/model8.pt2"
+  generate_cmd="python3 torchchat.py generate $model --pt2-package-path /tmp/model8.pt2 --prompt \"Once upon a time,\" --max-new-tokens 256 --device cpu --num-samples 3"
+  file="cpu_aoti_8.txt"
+  formatted_export_and_generate "$file" "$generate_cmd" "$compile_cmd" 
+  
+  echo "CPU aoti PT2 int4"
+  compile_cmd="python3 torchchat.py export $model --quantize '{\"linear:int4\": {\"groupsize\": 256}, \"precision\": {\"dtype\":\"bfloat16\"}, \"executor\":{\"accelerator\":\"cpu\"}}' --output-pt2-package-path /tmp/model34.pt2"
+  generate_cmd="python3 torchchat.py generate $model --pt2-package-path /tmp/model34.pt2 --prompt \"Once upon a time,\" --max-new-tokens 256 --device cpu --num-samples 3"
+  file="cpu_aoti_4.txt"
+  formatted_export_and_generate "$file" "$generate_cmd" "$compile_cmd" 
+fi