File tree Expand file tree Collapse file tree 6 files changed +6
-6
lines changed
engine-qwen-2-5-14b-coder-instruct
engine-qwen-2-5-14b-instruct
engine-qwen-2-5-32b-coder-instruct
engine-qwen-2-5-32b-instruct
engine-qwen-2-5-72b-instruct
engine-qwen-2-5-72b-math-instruct Expand file tree Collapse file tree 6 files changed +6
-6
lines changed Original file line number Diff line number Diff line change @@ -42,7 +42,7 @@ trt_llm:
42
42
tensor_parallel_count : 1
43
43
plugin_configuration :
44
44
use_paged_context_fmha : true
45
- use_fp8_context_fmha : true
45
+ use_fp8_context_fmha : false
46
46
paged_kv_cache : true
47
47
runtime :
48
48
batch_scheduler_policy : max_utilization
Original file line number Diff line number Diff line change @@ -36,7 +36,7 @@ trt_llm:
36
36
tensor_parallel_count : 1
37
37
plugin_configuration :
38
38
use_paged_context_fmha : true
39
- use_fp8_context_fmha : true
39
+ use_fp8_context_fmha : false
40
40
paged_kv_cache : true
41
41
runtime :
42
42
batch_scheduler_policy : max_utilization
Original file line number Diff line number Diff line change @@ -42,7 +42,7 @@ trt_llm:
42
42
tensor_parallel_count : 1
43
43
plugin_configuration :
44
44
use_paged_context_fmha : true
45
- use_fp8_context_fmha : true
45
+ use_fp8_context_fmha : false
46
46
paged_kv_cache : true
47
47
runtime :
48
48
batch_scheduler_policy : max_utilization
Original file line number Diff line number Diff line change @@ -42,7 +42,7 @@ trt_llm:
42
42
tensor_parallel_count : 1
43
43
plugin_configuration :
44
44
use_paged_context_fmha : true
45
- use_fp8_context_fmha : true
45
+ use_fp8_context_fmha : false
46
46
paged_kv_cache : true
47
47
runtime :
48
48
batch_scheduler_policy : max_utilization
Original file line number Diff line number Diff line change @@ -42,7 +42,7 @@ trt_llm:
42
42
tensor_parallel_count : 2
43
43
plugin_configuration :
44
44
use_paged_context_fmha : true
45
- use_fp8_context_fmha : true
45
+ use_fp8_context_fmha : false
46
46
paged_kv_cache : true
47
47
runtime :
48
48
batch_scheduler_policy : max_utilization
Original file line number Diff line number Diff line change @@ -42,7 +42,7 @@ trt_llm:
42
42
tensor_parallel_count : 2
43
43
plugin_configuration :
44
44
use_paged_context_fmha : true
45
- use_fp8_context_fmha : true
45
+ use_fp8_context_fmha : false
46
46
paged_kv_cache : true
47
47
runtime :
48
48
batch_scheduler_policy : max_utilization
You can’t perform that action at this time.
0 commit comments