Skip to content

Commit d7fdff4

Browse files
committed
update gemm block config
1 parent ba772f8 commit d7fdff4

File tree

20 files changed

+48
-54
lines changed

20 files changed

+48
-54
lines changed
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"1": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 1}, "2": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 32, "num_warps": 4, "num_stages": 1}, "4": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 1}, "8": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 1}, "16": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 3}, "24": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 3}, "32": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 3}, "48": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 128, "num_warps": 4, "num_stages": 3}, "64": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 3}, "96": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 3}, "128": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 128, "num_warps": 4, "num_stages": 3}, "256": {"BLOCK_M": 128, "BLOCK_N": 64, "BLOCK_K": 128, "GROUP_M": 32, "num_warps": 4, "num_stages": 3}, "512": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 2}, "1024": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 3}, "1536": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 128, "num_warps": 4, "num_stages": 2}, "2048": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 128, "num_warps": 4, "num_stages": 2}, "3072": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 32, "num_warps": 4, "num_stages": 3}, "4096": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 128, "num_warps": 4, "num_stages": 2}}
1+
{"1": {"BLOCK_M": 32, "BLOCK_N": 64, "BLOCK_K": 32, "GROUP_M": 8, "num_stages": 5, "num_warps": 2}, "2": {"BLOCK_M": 32, "BLOCK_N": 64, "BLOCK_K": 32, "GROUP_M": 8, "num_stages": 5, "num_warps": 2}, "4": {"BLOCK_M": 32, "BLOCK_N": 64, "BLOCK_K": 32, "GROUP_M": 8, "num_stages": 5, "num_warps": 2}, "8": {"BLOCK_M": 32, "BLOCK_N": 64, "BLOCK_K": 32, "GROUP_M": 8, "num_stages": 5, "num_warps": 2}, "16": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "24": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "32": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "48": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "64": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "96": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "128": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "256": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "512": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "1024": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "1536": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "2048": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "3072": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "4096": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}}
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"1": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 1}, "2": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 32, "num_warps": 4, "num_stages": 1}, "4": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 32, "num_warps": 4, "num_stages": 1}, "8": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 32, "num_warps": 4, "num_stages": 1}, "16": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 128, "num_warps": 4, "num_stages": 3}, "24": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 3}, "32": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 128, "num_warps": 4, "num_stages": 3}, "48": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 32, "num_warps": 4, "num_stages": 3}, "64": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 3}, "96": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 128, "num_warps": 4, "num_stages": 3}, "128": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 128, "num_warps": 4, "num_stages": 3}, "256": {"BLOCK_M": 128, "BLOCK_N": 64, "BLOCK_K": 128, "GROUP_M": 32, "num_warps": 4, "num_stages": 3}, "512": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 3}, "1024": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 3}, "1536": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 2}, "2048": {"BLOCK_M": 128, "BLOCK_N": 64, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 3}, "3072": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 128, "num_warps": 4, "num_stages": 3}, "4096": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 128, "num_warps": 4, "num_stages": 2}}
1+
{"1": {"BLOCK_M": 32, "BLOCK_N": 64, "BLOCK_K": 32, "GROUP_M": 8, "num_stages": 5, "num_warps": 2}, "2": {"BLOCK_M": 32, "BLOCK_N": 64, "BLOCK_K": 32, "GROUP_M": 8, "num_stages": 5, "num_warps": 2}, "4": {"BLOCK_M": 32, "BLOCK_N": 64, "BLOCK_K": 32, "GROUP_M": 8, "num_stages": 5, "num_warps": 2}, "8": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "16": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "24": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "32": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "48": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "64": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "96": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "128": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "256": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "512": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "1024": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "1536": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "2048": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "3072": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "4096": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}}
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"1": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 32, "num_warps": 4, "num_stages": 1}, "2": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 32, "num_warps": 4, "num_stages": 1}, "4": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 3}, "8": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 3}, "16": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 3}, "24": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 32, "num_warps": 4, "num_stages": 3}, "32": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 3}, "48": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 3}, "64": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 32, "num_warps": 4, "num_stages": 3}, "96": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 32, "num_warps": 4, "num_stages": 3}, "128": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 32, "num_warps": 4, "num_stages": 3}, "256": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 32, "num_warps": 4, "num_stages": 3}, "512": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 3}, "1024": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 128, "num_warps": 4, "num_stages": 2}, "1536": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 3}, "2048": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 128, "num_warps": 4, "num_stages": 2}, "3072": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 128, "num_warps": 4, "num_stages": 2}, "4096": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 128, "num_warps": 4, "num_stages": 3}}
1+
{"1": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "2": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "4": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "8": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "16": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "24": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "32": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "48": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "64": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "96": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "128": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "256": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "512": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "1024": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "1536": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "2048": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "3072": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "4096": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}}
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"1": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 1}, "2": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 32, "num_warps": 4, "num_stages": 1}, "4": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 3}, "8": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 128, "num_warps": 4, "num_stages": 3}, "16": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 128, "num_warps": 4, "num_stages": 3}, "24": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 3}, "32": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 128, "num_warps": 4, "num_stages": 3}, "48": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 3}, "64": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 128, "num_warps": 4, "num_stages": 3}, "96": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 3}, "128": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 3}, "256": {"BLOCK_M": 128, "BLOCK_N": 64, "BLOCK_K": 128, "GROUP_M": 128, "num_warps": 4, "num_stages": 3}, "512": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 3}, "1024": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 32, "num_warps": 4, "num_stages": 2}, "1536": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 32, "num_warps": 4, "num_stages": 2}, "2048": {"BLOCK_M": 128, "BLOCK_N": 64, "BLOCK_K": 128, "GROUP_M": 128, "num_warps": 4, "num_stages": 3}, "3072": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 128, "num_warps": 4, "num_stages": 2}, "4096": {"BLOCK_M": 128, "BLOCK_N": 64, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 2}}
1+
{"1": {"BLOCK_M": 32, "BLOCK_N": 64, "BLOCK_K": 32, "GROUP_M": 8, "num_stages": 5, "num_warps": 2}, "2": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "4": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "8": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "16": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "24": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "32": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "48": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "64": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "96": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "128": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "256": {"BLOCK_M": 128, "BLOCK_N": 64, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "512": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "1024": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "1536": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "2048": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "3072": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "4096": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}}
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"1": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 32, "num_warps": 4, "num_stages": 1}, "2": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 32, "num_warps": 4, "num_stages": 1}, "4": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 3}, "8": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 3}, "16": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 3}, "24": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 3}, "32": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 3}, "48": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 128, "num_warps": 4, "num_stages": 3}, "64": {"BLOCK_M": 64, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 64, "num_warps": 4, "num_stages": 3}, "96": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 128, "num_warps": 4, "num_stages": 3}, "128": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 128, "num_warps": 4, "num_stages": 3}, "256": {"BLOCK_M": 128, "BLOCK_N": 64, "BLOCK_K": 128, "GROUP_M": 32, "num_warps": 4, "num_stages": 3}, "512": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 128, "num_warps": 4, "num_stages": 3}, "1024": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 128, "num_warps": 4, "num_stages": 2}, "1536": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 128, "num_warps": 4, "num_stages": 2}, "2048": {"BLOCK_M": 128, "BLOCK_N": 64, "BLOCK_K": 128, "GROUP_M": 128, "num_warps": 4, "num_stages": 3}, "3072": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 128, "GROUP_M": 128, "num_warps": 4, "num_stages": 2}, "4096": {"BLOCK_M": 128, "BLOCK_N": 64, "BLOCK_K": 128, "GROUP_M": 32, "num_warps": 4, "num_stages": 2}}
1+
{"1": {"BLOCK_M": 32, "BLOCK_N": 64, "BLOCK_K": 32, "GROUP_M": 8, "num_stages": 5, "num_warps": 2}, "2": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "4": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "8": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "16": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "24": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "32": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "48": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "64": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "96": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "128": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "256": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "512": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "1024": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "1536": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "2048": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "3072": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}, "4096": {"BLOCK_M": 128, "BLOCK_N": 32, "BLOCK_K": 64, "GROUP_M": 8, "num_stages": 4, "num_warps": 4}}

0 commit comments

Comments
 (0)