cache-dit/bench/bench_fast.sh at ascend-patch · zhangtao0408/cache-dit · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
export FLUX_DIR="${FLUX_DIR:-$HF_MODELS/FLUX.1-dev}"
export CLIP_MODEL_DIR="${CLIP_MODEL_DIR:-$HF_MODELS/cache-dit-eval/CLIP-ViT-g-14-laion2B-s12B-b42K}"
export IMAGEREWARD_MODEL_DIR="${IMAGEREWARD_MODEL_DIR:-$HF_MODELS/cache-dit-eval/ImageReward}"

function run_flux_draw_bench_fast() {
  local test_num=200
  local save_dir="./tmp/DrawBench200_DBCache_Fast"
  local base_params="--test-num ${test_num} --save-dir ${save_dir} --flops"

  rdt=0.8 # 0.64 0.8 1.0
  echo "Running residual diff threshold: ${rdt}, test_num: ${test_num}"
  python3 bench.py ${base_params} # baseline
  python3 bench.py ${base_params} --cache --Fn 1 --Bn 0 --max-warmup-steps 8  --warmup-interval 2 --rdt ${rdt} --mcc 10
  python3 bench.py ${base_params} --cache --Fn 1 --Bn 0 --max-warmup-steps 8  --warmup-interval 2 --rdt ${rdt} --mcc 8
  python3 bench.py ${base_params} --cache --Fn 1 --Bn 0 --max-warmup-steps 8  --warmup-interval 4 --rdt ${rdt} --mcc 10
  python3 bench.py ${base_params} --cache --Fn 1 --Bn 0 --max-warmup-steps 8  --warmup-interval 4 --rdt ${rdt} --mcc 8
  python3 bench.py ${base_params} --cache --Fn 1 --Bn 0 --max-warmup-steps 16 --warmup-interval 4 --rdt ${rdt} --mcc 10
  python3 bench.py ${base_params} --cache --Fn 1 --Bn 0 --max-warmup-steps 16 --warmup-interval 4 --rdt ${rdt} --mcc 8
}


function run_flux_draw_bench_with_taylorseer_fast() {
  local taylorseer_params="--taylorseer --order 1"
  local test_num=200
  local save_dir="./tmp/DrawBench200_DBCache_TaylorSeer_Fast"
  local base_params="--test-num ${test_num} --save-dir ${save_dir} --flops"

  rdt=0.8 # 0.64 0.8 1.0
  echo "Running residual diff threshold: ${rdt}, test_num: ${test_num}"
  python3 bench.py ${base_params} # baseline
  python3 bench.py ${base_params} --cache --Fn 1 --Bn 0 --max-warmup-steps 8  --warmup-interval 2 --rdt ${rdt} --mcc 10 ${taylorseer_params}
  python3 bench.py ${base_params} --cache --Fn 1 --Bn 0 --max-warmup-steps 8  --warmup-interval 2 --rdt ${rdt} --mcc 8  ${taylorseer_params}
  python3 bench.py ${base_params} --cache --Fn 1 --Bn 0 --max-warmup-steps 8  --warmup-interval 4 --rdt ${rdt} --mcc 10 ${taylorseer_params}
  python3 bench.py ${base_params} --cache --Fn 1 --Bn 0 --max-warmup-steps 8  --warmup-interval 4 --rdt ${rdt} --mcc 8  ${taylorseer_params}
  python3 bench.py ${base_params} --cache --Fn 1 --Bn 0 --max-warmup-steps 16 --warmup-interval 4 --rdt ${rdt} --mcc 10 ${taylorseer_params}
  python3 bench.py ${base_params} --cache --Fn 1 --Bn 0 --max-warmup-steps 16 --warmup-interval 4 --rdt ${rdt} --mcc 8  ${taylorseer_params}
}

function run_flux_draw_bench_with_taylorseer_fast_O2() {
  local taylorseer_params="--taylorseer --order 2"
  local test_num=200
  local save_dir="./tmp/DrawBench200_DBCache_TaylorSeer_Fast_O2"
  local base_params="--test-num ${test_num} --save-dir ${save_dir} --flops"

  rdt=0.8 # 0.64 0.8 1.0
  echo "Running residual diff threshold: ${rdt}, test_num: ${test_num}"
  python3 bench.py ${base_params} # baseline
  python3 bench.py ${base_params} --cache --Fn 1 --Bn 0 --max-warmup-steps 8  --warmup-interval 2 --rdt ${rdt} --mcc 10 ${taylorseer_params}
  python3 bench.py ${base_params} --cache --Fn 1 --Bn 0 --max-warmup-steps 8  --warmup-interval 2 --rdt ${rdt} --mcc 8  ${taylorseer_params}
  python3 bench.py ${base_params} --cache --Fn 1 --Bn 0 --max-warmup-steps 8  --warmup-interval 4 --rdt ${rdt} --mcc 10 ${taylorseer_params}
  python3 bench.py ${base_params} --cache --Fn 1 --Bn 0 --max-warmup-steps 8  --warmup-interval 4 --rdt ${rdt} --mcc 8  ${taylorseer_params}
  python3 bench.py ${base_params} --cache --Fn 1 --Bn 0 --max-warmup-steps 16 --warmup-interval 4 --rdt ${rdt} --mcc 10 ${taylorseer_params}
  python3 bench.py ${base_params} --cache --Fn 1 --Bn 0 --max-warmup-steps 16 --warmup-interval 4 --rdt ${rdt} --mcc 8  ${taylorseer_params}
}

bench_type=$1

if [[ "${bench_type}" == "taylorseer" ]]; then
  echo "bench_type: ${bench_type}, DBCache Fast + TaylorSeer"
  run_flux_draw_bench_with_taylorseer_fast
elif [[ "${bench_type}" == "taylorseer_O2" ]]; then
  echo "bench_type: ${bench_type}, DBCache Fast + TaylorSeer O(2)"
  run_flux_draw_bench_with_taylorseer_fast_O2
else
  echo "bench_type: ${bench_type}, DBCache Fast"
  run_flux_draw_bench_fast
fi

# export CUDA_VISIBLE_DEVICES=0 && nohup bash bench_fast.sh default > log/cache_dit_bench_fast.log 2>&1 &
# export CUDA_VISIBLE_DEVICES=1 && nohup bash bench_fast.sh taylorseer > log/cache_dit_bench_taylorseer_fast.log 2>&1 &
# export CUDA_VISIBLE_DEVICES=2 && nohup bash bench_fast.sh taylorseer_O2 > log/cache_dit_bench_taylorseer_fast_O2.log 2>&1 &