Skip to content

Commit fdc2ad6

Browse files
committed
[SYCL][NVPTX] Enable -fcuda-short-ptr by default
This makes pointers to CUDA shared, const, and local address spaces as being 32-bit pointers. This should bring decent performance improvements in certain programs.
1 parent 1d58d6b commit fdc2ad6

File tree

3 files changed

+14
-7
lines changed

3 files changed

+14
-7
lines changed

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8537,9 +8537,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
85378537
}
85388538
}
85398539

8540-
if (IsCuda) {
8540+
if (IsCuda || (IsSYCLDevice && Triple.isNVPTX())) {
8541+
bool UseShortPtr = IsSYCLDevice && Triple.isNVPTX();
85418542
if (Args.hasFlag(options::OPT_fcuda_short_ptr,
8542-
options::OPT_fno_cuda_short_ptr, false))
8543+
options::OPT_fno_cuda_short_ptr, UseShortPtr))
85438544
CmdArgs.push_back("-fcuda-short-ptr");
85448545
}
85458546

clang/lib/Driver/ToolChains/Cuda.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1010,6 +1010,11 @@ void CudaToolChain::addClangTargetOptions(
10101010
CC1Args.push_back(DriverArgs.MakeArgString(LibSpirvFile));
10111011
}
10121012

1013+
bool UseShortPtr = DeviceOffloadingKind == Action::OFK_SYCL;
1014+
if (DriverArgs.hasFlag(options::OPT_fcuda_short_ptr,
1015+
options::OPT_fno_cuda_short_ptr, UseShortPtr))
1016+
CC1Args.append({"-mllvm", "--nvptx-short-ptr"});
1017+
10131018
if (DriverArgs.hasArg(options::OPT_nogpulib))
10141019
return;
10151020

@@ -1035,10 +1040,6 @@ void CudaToolChain::addClangTargetOptions(
10351040

10361041
clang::CudaVersion CudaInstallationVersion = CudaInstallation.version();
10371042

1038-
if (DriverArgs.hasFlag(options::OPT_fcuda_short_ptr,
1039-
options::OPT_fno_cuda_short_ptr, false))
1040-
CC1Args.append({"-mllvm", "--nvptx-short-ptr"});
1041-
10421043
if (CudaInstallationVersion >= CudaVersion::UNKNOWN)
10431044
CC1Args.push_back(
10441045
DriverArgs.MakeArgString(Twine("-target-sdk-version=") +

libclc/CMakeLists.txt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -450,10 +450,15 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
450450
list(APPEND flags -D__unix__)
451451
endif()
452452

453+
set(spirv_flags ${flags})
454+
if( ARCH STREQUAL nvptx OR ARCH STREQUAL nvptx64 )
455+
list(APPEND spirv_flags -Xclang -fcuda-short-ptr -mllvm -nvptx-short-ptr)
456+
endif()
457+
453458
add_libclc_builtin_set(libspirv-${arch_suffix}
454459
TRIPLE ${clang_triple}
455460
TARGET_ENV libspirv
456-
COMPILE_OPT ${flags}
461+
COMPILE_OPT ${spirv_flags}
457462
OPT_FLAGS ${opt_flags}
458463
FILES ${libspirv_files}
459464
GEN_FILES ${libspirv_gen_files}

0 commit comments

Comments
 (0)