Relax performance tests for GPUs.

mcourteaux · mcourteaux · commit 845d83a8f2ec · 2025-06-14T13:57:54.000+02:00
diff --git a/test/performance/fast_function_approximations.cpp b/test/performance/fast_function_approximations.cpp
@@ -81,7 +81,7 @@ int main(int argc, char **argv) {
             -1.0f, 1.0f,
             [](Expr x, Expr y, Expr z) { return Halide::tan(x + z); },
             [](Expr x, Expr y, Expr z, Halide::ApproximationPrecision prec) { return Halide::fast_tan(x + z, prec); },
-            {Target::Feature::WebGPU, Target::Feature::Metal},
+            {Target::Feature::WebGPU, Target::Feature::Metal, Target::Feature::Vulkan},
         },
         {
             "atan",
@@ -181,7 +181,7 @@ int main(int argc, char **argv) {
             f.never_partition_all();
             f.gpu_tile(x, y, xo, yo, xi, yi, 64, 16, TailStrategy::ShiftInwards).vectorize(xi, 4);
         } else {
-            f.vectorize(x, 8);
+            f.vectorize(x, target.natural_vector_size<float>());
         }
     };
     Buffer<float> buffer_out(test_w, test_h);
@@ -249,6 +249,10 @@ int main(int argc, char **argv) {
                         should_be_faster = false;
                     }
                 }
+            } else {
+                if (target.has_gpu_feature() && precision.precision.optimized_for != ApproximationPrecision::AUTO) {
+                    should_be_faster = false;
+                }
             }
             if (should_be_faster) num_tests++;
 

Original file line number	Diff line number	Diff line change
`@@ -81,7 +81,7 @@ int main(int argc, char **argv) {`
`81`	`81`	`-1.0f, 1.0f,`
`82`	`82`	`[](Expr x, Expr y, Expr z) { return Halide::tan(x + z); },`
`83`	`83`	`[](Expr x, Expr y, Expr z, Halide::ApproximationPrecision prec) { return Halide::fast_tan(x + z, prec); },`
`84`		`- {Target::Feature::WebGPU, Target::Feature::Metal},`
	`84`	`+ {Target::Feature::WebGPU, Target::Feature::Metal, Target::Feature::Vulkan},`
`85`	`85`	`},`
`86`	`86`	`{`
`87`	`87`	`"atan",`
`@@ -181,7 +181,7 @@ int main(int argc, char **argv) {`
`181`	`181`	`f.never_partition_all();`
`182`	`182`	`f.gpu_tile(x, y, xo, yo, xi, yi, 64, 16, TailStrategy::ShiftInwards).vectorize(xi, 4);`
`183`	`183`	`} else {`
`184`		`- f.vectorize(x, 8);`
	`184`	`+ f.vectorize(x, target.natural_vector_size<float>());`
`185`	`185`	`}`
`186`	`186`	`};`
`187`	`187`	`Buffer<float> buffer_out(test_w, test_h);`
`@@ -249,6 +249,10 @@ int main(int argc, char **argv) {`
`249`	`249`	`should_be_faster = false;`
`250`	`250`	`}`
`251`	`251`	`}`
	`252`	`+ } else {`
	`253`	`+ if (target.has_gpu_feature() && precision.precision.optimized_for != ApproximationPrecision::AUTO) {`
	`254`	`+ should_be_faster = false;`
	`255`	`+ }`
`252`	`256`	`}`
`253`	`257`	`if (should_be_faster) num_tests++;`
`254`	`258`