doc: update full script

terryyz · terryyz · commit 01e2fb08e33f · 2024-10-05T06:03:21.000+08:00
diff --git a/run.sh b/run.sh
@@ -1,39 +1,14 @@
-BS=5
 DATASET=bigcodebench
-MODEL=gpt-3.5-turbo-0125
-BACKEND=openai
-TEMP=0
-N_SAMPLES=1
+MODEL=meta-llama/Llama-3.2-1B-Instruct
+BACKEND=vllm
 NUM_GPU=1
 SPLIT=complete
 SUBSET=hard
-if [[ $MODEL == *"/"* ]]; then
-  ORG=$(echo $MODEL | cut -d'/' -f1)--
-  BASE_MODEL=$(echo $MODEL | cut -d'/' -f2)
-else
-  ORG=""
-  BASE_MODEL=$MODEL
-fi
 
-if [ "$SUBSET" = "full" ]; then
-    FILE_HEADER="${ORG}${BASE_MODEL}--${DATASET}-${SPLIT}--${BACKEND}-${TEMP}-${N_SAMPLES}"
-  else
-    FILE_HEADER="${ORG}${BASE_MODEL}--${DATASET}-${SUBSET}-${SPLIT}--${BACKEND}-${TEMP}-${N_SAMPLES}"
-  fi
-
-echo $FILE_HEADER
-bigcodebench.generate \
+bigcodebench.evaluate \
   --model $MODEL \
-  --resume \
+  --samples meta-llama--Llama-3.2-1B-Instruct--bigcodebench-hard-complete--vllm-0-1-sanitized_calibrated.jsonl \
   --split $SPLIT \
   --subset $SUBSET \
   --backend $BACKEND \
-  --greedy
-
-bigcodebench.sanitize --samples $FILE_HEADER.jsonl --calibrate
-
-# Check if the ground truth works on your machine
-bigcodebench.evaluate --split $SPLIT --subset $SUBSET --samples $FILE_HEADER-sanitized-calibrated.jsonl
-
-# If the execution is slow:
-bigcodebench.evaluate --split $SPLIT --subset $SUBSET --samples $FILE_HEADER-sanitized-calibrated.jsonl --parallel 32
+  --greedy