Skip to content

Commit 81e4b19

Browse files
committed
cuda_runtime: use fast math
1 parent 4d5a076 commit 81e4b19

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

src/runtime/cuda/cuda_runtime_program.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ static bool emit_cuda_c_code(CudaKernel* spec) {
3434
static bool cuda_c_to_ptx(CudaKernel* kernel) {
3535
nvrtcProgram program;
3636
CHECK_NVRTC(nvrtcCreateProgram(&program, kernel->cuda_code, kernel->key.entry_point, 0, NULL, NULL), return false);
37-
nvrtcResult compile_result = nvrtcCompileProgram(program, 0, false);
37+
const char* args[] = { "--use_fast_math" };
38+
nvrtcResult compile_result = nvrtcCompileProgram(program, sizeof(args) / sizeof(*args), args);
3839
if (compile_result != NVRTC_SUCCESS) {
3940
error_print("NVRTC compilation failed: %s\n", nvrtcGetErrorString(compile_result));
4041
debug_print("Dumping source:\n%s", kernel->cuda_code);

0 commit comments

Comments
 (0)