Skip to content

Commit 845d83a

Browse files
committed
Relax performance tests for GPUs.
1 parent 58bf523 commit 845d83a

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

test/performance/fast_function_approximations.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ int main(int argc, char **argv) {
8181
-1.0f, 1.0f,
8282
[](Expr x, Expr y, Expr z) { return Halide::tan(x + z); },
8383
[](Expr x, Expr y, Expr z, Halide::ApproximationPrecision prec) { return Halide::fast_tan(x + z, prec); },
84-
{Target::Feature::WebGPU, Target::Feature::Metal},
84+
{Target::Feature::WebGPU, Target::Feature::Metal, Target::Feature::Vulkan},
8585
},
8686
{
8787
"atan",
@@ -181,7 +181,7 @@ int main(int argc, char **argv) {
181181
f.never_partition_all();
182182
f.gpu_tile(x, y, xo, yo, xi, yi, 64, 16, TailStrategy::ShiftInwards).vectorize(xi, 4);
183183
} else {
184-
f.vectorize(x, 8);
184+
f.vectorize(x, target.natural_vector_size<float>());
185185
}
186186
};
187187
Buffer<float> buffer_out(test_w, test_h);
@@ -249,6 +249,10 @@ int main(int argc, char **argv) {
249249
should_be_faster = false;
250250
}
251251
}
252+
} else {
253+
if (target.has_gpu_feature() && precision.precision.optimized_for != ApproximationPrecision::AUTO) {
254+
should_be_faster = false;
255+
}
252256
}
253257
if (should_be_faster) num_tests++;
254258

0 commit comments

Comments
 (0)