From 08f637546e4a1adff1b474fe6b2ae48877aee805 Mon Sep 17 00:00:00 2001 From: David Bayer Date: Thu, 17 Oct 2024 15:57:47 +0200 Subject: [PATCH 1/2] remove unnecessary heap allocations --- .../vkFFT_API_handles/vkFFT_CompileKernel.h | 45 +++++-------------- 1 file changed, 11 insertions(+), 34 deletions(-) diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h index 37a4644..574b729 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h @@ -329,30 +329,19 @@ static inline VkFFTResult VkFFT_CompileKernel(VkFFTApplication* app, VkFFTAxis* deleteVkFFT(app); return VKFFT_ERROR_FAILED_TO_CREATE_PROGRAM; } - int numOpts = 1; + int numOpts = 0; char* opts[5]; - opts[0] = (char*)malloc(sizeof(char) * 50); - if (!opts[0]) { - free(code0); - code0 = 0; - deleteVkFFT(app); - return VKFFT_ERROR_MALLOC_FAILED; - } + char archOpt[50]; #if (CUDA_VERSION >= 11030) - sprintf(opts[0], "--gpu-architecture=sm_%" PRIu64 "%" PRIu64 "", app->configuration.computeCapabilityMajor, app->configuration.computeCapabilityMinor); + sprintf(archOpt, "--gpu-architecture=sm_%" PRIu64 "%" PRIu64 "", app->configuration.computeCapabilityMajor, app->configuration.computeCapabilityMinor); #else - sprintf(opts[0], "--gpu-architecture=compute_%" PRIu64 "%" PRIu64 "", app->configuration.computeCapabilityMajor, app->configuration.computeCapabilityMinor); + sprintf(archOpt, "--gpu-architecture=compute_%" PRIu64 "%" PRIu64 "", app->configuration.computeCapabilityMajor, app->configuration.computeCapabilityMinor); #endif + opts[numOpts++] = archOpt; + char fmadOpt[50]; if (app->configuration.quadDoubleDoublePrecision || app->configuration.quadDoubleDoublePrecisionDoubleMemory){ - opts[1] = (char*)malloc(sizeof(char) * 50); - if (!opts[1]) { - free(code0); - code0 = 0; - deleteVkFFT(app); - return VKFFT_ERROR_MALLOC_FAILED; - } - numOpts++; - sprintf(opts[1], "-fmad=false"); + sprintf(fmadOpt, "-fmad=false"); + opts[numOpts++] = fmadOpt; } //result = nvrtcAddNameExpression(prog, "&consts"); //if (result != NVRTC_SUCCESS) printf("1.5 error: %s\n", nvrtcGetErrorString(result)); @@ -360,10 +349,6 @@ static inline VkFFTResult VkFFT_CompileKernel(VkFFTApplication* app, VkFFTAxis* numOpts, // numOptions (const char* const*)opts); // options - free(opts[0]); - if (app->configuration.quadDoubleDoublePrecision || app->configuration.quadDoubleDoublePrecisionDoubleMemory) - free(opts[1]); - if (result != NVRTC_SUCCESS) { printf("nvrtcCompileProgram error: %s\n", nvrtcGetErrorString(result)); char* log = (char*)malloc(sizeof(char) * 4000000); @@ -533,24 +518,16 @@ static inline VkFFTResult VkFFT_CompileKernel(VkFFTApplication* app, VkFFTAxis* }*/ int numOpts = 0; char* opts[5]; + char ffpContractOpt[50]; if (app->configuration.quadDoubleDoublePrecision || app->configuration.quadDoubleDoublePrecisionDoubleMemory){ - opts[0] = (char*)malloc(sizeof(char) * 50); - if (!opts[0]) { - free(code0); - code0 = 0; - deleteVkFFT(app); - return VKFFT_ERROR_MALLOC_FAILED; - } - numOpts++; - sprintf(opts[0], "-ffp-contract=off"); + sprintf(ffpContractOpt, "-ffp-contract=off"); + opts[numOpts++] = ffpContractOpt; } result = hiprtcCompileProgram(prog, // prog numOpts, // numOptions (const char**)opts); // options - if (app->configuration.quadDoubleDoublePrecision || app->configuration.quadDoubleDoublePrecisionDoubleMemory) - free(opts[0]); if (result != HIPRTC_SUCCESS) { printf("hiprtcCompileProgram error: %s\n", hiprtcGetErrorString(result)); char* log = (char*)malloc(sizeof(char) * 100000); From f1fdffa21aa2b63bf23dab5e81b7a5255b3f8ec2 Mon Sep 17 00:00:00 2001 From: David Bayer Date: Thu, 17 Oct 2024 17:16:23 +0200 Subject: [PATCH 2/2] remove unnecessary rtc opts const cast --- .../vkFFT_API_handles/vkFFT_CompileKernel.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h index 574b729..e623e38 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h @@ -330,7 +330,7 @@ static inline VkFFTResult VkFFT_CompileKernel(VkFFTApplication* app, VkFFTAxis* return VKFFT_ERROR_FAILED_TO_CREATE_PROGRAM; } int numOpts = 0; - char* opts[5]; + const char* opts[5]; char archOpt[50]; #if (CUDA_VERSION >= 11030) sprintf(archOpt, "--gpu-architecture=sm_%" PRIu64 "%" PRIu64 "", app->configuration.computeCapabilityMajor, app->configuration.computeCapabilityMinor); @@ -345,9 +345,7 @@ static inline VkFFTResult VkFFT_CompileKernel(VkFFTApplication* app, VkFFTAxis* } //result = nvrtcAddNameExpression(prog, "&consts"); //if (result != NVRTC_SUCCESS) printf("1.5 error: %s\n", nvrtcGetErrorString(result)); - result = nvrtcCompileProgram(prog, // prog - numOpts, // numOptions - (const char* const*)opts); // options + result = nvrtcCompileProgram(prog, numOpts, opts); if (result != NVRTC_SUCCESS) { printf("nvrtcCompileProgram error: %s\n", nvrtcGetErrorString(result)); @@ -517,16 +515,14 @@ static inline VkFFTResult VkFFT_CompileKernel(VkFFTApplication* app, VkFFTAxis* } }*/ int numOpts = 0; - char* opts[5]; + const char* opts[5]; char ffpContractOpt[50]; if (app->configuration.quadDoubleDoublePrecision || app->configuration.quadDoubleDoublePrecisionDoubleMemory){ sprintf(ffpContractOpt, "-ffp-contract=off"); opts[numOpts++] = ffpContractOpt; } - result = hiprtcCompileProgram(prog, // prog - numOpts, // numOptions - (const char**)opts); // options + result = hiprtcCompileProgram(prog, numOpts, opts); if (result != HIPRTC_SUCCESS) { printf("hiprtcCompileProgram error: %s\n", hiprtcGetErrorString(result));