Skip to content

Commit 9489b8d

Browse files
author
EC2 Default User
committed
add map of valid optimization combinations
1 parent fcb5092 commit 9489b8d

File tree

1 file changed

+38
-0
lines changed

1 file changed

+38
-0
lines changed
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
SUPPORTED_OPTIMIZATION_CONFIGURATIONS = {
2+
"trt": {
3+
"supported_instance_families": ["p4d", "p4de", "p5", "g5", "g6"],
4+
"compilation": True,
5+
"quantization": {
6+
"awq": True,
7+
"fp8": True,
8+
"gptq": False,
9+
"smooth_quant": True
10+
},
11+
"speculative_decoding": False,
12+
"sharding": False
13+
},
14+
"vllm": {
15+
"supported_instance_families": ["p4d", "p4de", "p5", "g5", "g6"],
16+
"compilation": False,
17+
"quantization": {
18+
"awq": True,
19+
"fp8": True,
20+
"gptq": False,
21+
"smooth_quant": False
22+
},
23+
"speculative_decoding": True,
24+
"sharding": True
25+
},
26+
"neuron": {
27+
"supported_instance_families": ["inf2", "trn1", "trn1n"],
28+
"compilation": True,
29+
"quantization": {
30+
"awq": False,
31+
"fp8": False,
32+
"gptq": False,
33+
"smooth_quant": False
34+
},
35+
"speculative_decoding": False,
36+
"sharding": False
37+
}
38+
}

0 commit comments

Comments
 (0)