Update perf number

yanboliang · yanboliang · commit 4f98fe019766 · 2024-03-09T16:05:10.000-08:00
diff --git a/mixtral-moe/README.md b/mixtral-moe/README.md
@@ -14,8 +14,8 @@ Benchmarks run on an 8xA100-80GB, power limited to 330W with a hybrid cube mesh
 
 |                  |   1 GPU |    2 GPU  | 4 GPU  |    8 GPU   |
 |------------------|---------|-----------|--------|------------|
-|baseline(bfloat16)|    OOM  |    96.67  | 125.70 |  227.82    |
-|        int8      |   97.92 |   155.03  | 162.58 |  279.35    |
+|baseline(bfloat16)|    OOM  |    96.67  | 155.35 |  227.82    |
+|        int8      |   97.92 |   155.03  | 216.87 |  279.35    |
 
 
 ## Generate Text
diff --git a/mixtral-moe/generate.py b/mixtral-moe/generate.py
@@ -24,7 +24,7 @@ def device_sync(device):
 
 torch._inductor.config.coordinate_descent_tuning = True
 torch._inductor.config.triton.unique_kernel_names = True
-# torch._inductor.config.fx_graph_cache = True # Experimental feature to reduce compilation times, will be on by default in future
+torch._inductor.config.fx_graph_cache = True # Experimental feature to reduce compilation times, will be on by default in future
 
 
 # support running without installing as a package