Skip to content

Commit 845d46e

Browse files
authored
moe quantization support int8 and fp8 (#702)
* moe quantization support int8 and fp8 * code formatting * dot dtype * support only fp8 fnuz types * quantize tensor fixed to quantize for correct types
1 parent 752d83c commit 845d46e

File tree

3 files changed

+327
-29
lines changed

3 files changed

+327
-29
lines changed
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
{
2+
"small_M": {
3+
"BLOCK_SIZE_M": 64,
4+
"BLOCK_SIZE_N": 64,
5+
"BLOCK_SIZE_K": 128,
6+
"GROUP_SIZE_M": 4,
7+
"num_warps": 8,
8+
"num_stages": 2,
9+
"waves_per_eu": 0,
10+
"matrix_instr_nonkdim": 16,
11+
"kpack": 2
12+
},
13+
"medium_M": {
14+
"BLOCK_SIZE_M": 128,
15+
"BLOCK_SIZE_N": 128,
16+
"BLOCK_SIZE_K": 128,
17+
"GROUP_SIZE_M": 1,
18+
"num_warps": 8,
19+
"num_stages": 2,
20+
"waves_per_eu": 0,
21+
"matrix_instr_nonkdim": 16,
22+
"kpack": 2
23+
},
24+
"large_M": {
25+
"BLOCK_SIZE_M": 128,
26+
"BLOCK_SIZE_N": 128,
27+
"BLOCK_SIZE_K": 128,
28+
"GROUP_SIZE_M": 1,
29+
"num_warps": 8,
30+
"num_stages": 2,
31+
"waves_per_eu": 0,
32+
"matrix_instr_nonkdim": 16,
33+
"kpack": 2
34+
}
35+
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
{
2+
"small_M": {
3+
"BLOCK_SIZE_M": 32,
4+
"BLOCK_SIZE_N": 64,
5+
"BLOCK_SIZE_K": 128,
6+
"GROUP_SIZE_M": 1,
7+
"num_warps": 8,
8+
"num_stages": 2,
9+
"waves_per_eu": 0,
10+
"matrix_instr_nonkdim": 16,
11+
"kpack": 2
12+
},
13+
"medium_M": {
14+
"BLOCK_SIZE_M": 64,
15+
"BLOCK_SIZE_N": 128,
16+
"BLOCK_SIZE_K": 64,
17+
"GROUP_SIZE_M": 1,
18+
"num_warps": 8,
19+
"num_stages": 2,
20+
"waves_per_eu": 0,
21+
"matrix_instr_nonkdim": 16,
22+
"kpack": 2
23+
},
24+
"large_M": {
25+
"BLOCK_SIZE_M": 128,
26+
"BLOCK_SIZE_N": 256,
27+
"BLOCK_SIZE_K": 128,
28+
"GROUP_SIZE_M": 1,
29+
"num_warps": 8,
30+
"num_stages": 2,
31+
"waves_per_eu": 0,
32+
"matrix_instr_nonkdim": 16,
33+
"kpack": 2
34+
}
35+
}

0 commit comments

Comments
 (0)