Skip to content

Commit 18c1c5b

Browse files
authored
[ET-VK][ez] Address regressed conv2d perf numbers on main (#16084)
Address the benchmark binaries reporting worse performance than one month ago. The regression was not a "real" regression but due to some changes in the benchmark binaries that were made during debugging but were not reverted during landing: 1. Only running 1 benchmark iteration without any warmup iterations 2. The quantize/dequantize shaders would normally be excluded for the overall execution time / FLOPS calculation, but the name of these shaders was recently changed and the logic that filtered these shaders when reporting time was not updated accordingly. Also includes a small fix to the input data loading logic. Differential Revision: [D88381899](https://our.internmc.facebook.com/intern/diff/D88381899/)
1 parent e74f1d3 commit 18c1c5b

File tree

6 files changed

+11
-10
lines changed

6 files changed

+11
-10
lines changed

backends/vulkan/runtime/graph/ops/glsl/conv2d_fp_input_tile_load.glslh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ void load_fp_input_tile(
3939
[[unroll]] for (int w = 0; w < TILE_M; w++) {
4040
if (load_tidx.data.x < input_sizes.x) {
4141
tile.data[w][0] = load_fp_input_texel(load_tidx);
42+
} else {
43+
tile.data[w][0] = VEC4_T(0);
4244
}
4345
load_tidx.data.x++;
4446
}

backends/vulkan/test/custom_ops/q4gsw_linear.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -552,7 +552,7 @@ int main(int argc, char* argv[]) {
552552
generate_quantized_linear_test_cases,
553553
quantized_linear_flop_calculator,
554554
"QuantizedLinearQ4GSW",
555-
10,
555+
3,
556556
10,
557557
ref_fn);
558558

backends/vulkan/test/custom_ops/q8csw_linear.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -471,7 +471,7 @@ int main(int argc, char* argv[]) {
471471
generate_quantized_linear_test_cases,
472472
quantized_linear_flop_calculator,
473473
"QuantizedLinear",
474-
0,
474+
3,
475475
10,
476476
ref_fn);
477477

backends/vulkan/test/custom_ops/q8ta_q8csw_q8to_conv2d.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -626,8 +626,8 @@ int main(int argc, char* argv[]) {
626626
generate_quantized_conv2d_test_cases,
627627
quantized_conv2d_flop_calculator,
628628
"QuantizedConv2dQ8ToQ8To",
629-
0,
630-
1,
629+
3,
630+
10,
631631
ref_fn);
632632

633633
return 0;

backends/vulkan/test/custom_ops/q8ta_q8csw_q8to_conv2d_dw.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -584,8 +584,8 @@ int main(int argc, char* argv[]) {
584584
generate_quantized_conv2d_dw_test_cases,
585585
quantized_conv2d_dw_flop_calculator,
586586
"QuantizedDepthwiseInt8Conv2d",
587-
0,
588-
1,
587+
3,
588+
10,
589589
ref_fn);
590590

591591
return 0;

backends/vulkan/test/custom_ops/utils.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -662,10 +662,9 @@ float collect_gpu_timing_us(ComputeGraph& graph) {
662662
for (const auto& shader_result : results) {
663663
if (shader_result.kernel_name.find("nchw_to") == std::string::npos &&
664664
shader_result.kernel_name.find("to_nchw") == std::string::npos &&
665-
shader_result.kernel_name.find(
666-
"quantize_and_pack_q8ta_conv2d_input") == std::string::npos &&
667-
shader_result.kernel_name.find(
668-
"unpack_and_dequantize_q8ta_conv2d_output") ==
665+
shader_result.kernel_name.find("quantize_and_pack_4w4c") ==
666+
std::string::npos &&
667+
shader_result.kernel_name.find("unpack_4w4c_and_dequantize") ==
669668
std::string::npos) {
670669
// Calculate duration from start and end times, convert from ns to μs
671670
uint64_t duration_ns =

0 commit comments

Comments
 (0)