fix(rtc): move TCNN_HALF_PRECISION definition to compiler opts

Tom94 · web-flow · commit 09ee3d6cd70f · 2025-12-14T23:53:17.000+01:00
diff --git a/src/rtc_kernel.cu b/src/rtc_kernel.cu
@@ -132,6 +132,7 @@ CudaRtcKernel::CudaRtcKernel(const std::string& name, const std::string& kernel_
 
 	std::vector<std::string> opts = {
 		fmt::format("--gpu-architecture=compute_{}", cc),
+		fmt::format("-DTCNN_HALF_PRECISION={}", TCNN_HALF_PRECISION),
 		fmt::format("-DTCNN_MIN_GPU_ARCH={}", cc),
 		"--std=c++14",
 #ifdef TCNN_RTC_USE_FAST_MATH
@@ -178,8 +179,6 @@ CudaRtcKernel::CudaRtcKernel(const std::string& name, const std::string& kernel_
 			{OPTS}
 			*/
 
-			#define TCNN_HALF_PRECISION {TCNN_HALF_PRECISION}
-
 			// NVRTC does not come with the C++ standard library out of the box and
 			// it would be troublesome to bundle it or require users to have it installed
 			// in readily available paths. So we instead include a minimal custom
@@ -192,8 +191,7 @@ CudaRtcKernel::CudaRtcKernel(const std::string& name, const std::string& kernel_
 		"KERNEL_NAME"_a = name,
 		"PREAMBLE"_a = generate_device_code_preamble(),
 		"OPTS"_a = join(opts, "\n"),
-		"KERNEL_CODE"_a = kernel_code,
-		"TCNN_HALF_PRECISION"_a = TCNN_HALF_PRECISION
+		"KERNEL_CODE"_a = kernel_code
 	);
 
 	size_t code_hash = hash_combine(0, complete_code);