Skip to content

Commit e214245

Browse files
committed
Raise MSVC warning level from /W3 to /W4
This reinstates the reverted changes in oneapi-src#2100 which were reverted in oneapi-src#2302 due to regressions in intel/llvm testing.
1 parent cfe5912 commit e214245

File tree

71 files changed

+292
-237
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

71 files changed

+292
-237
lines changed

cmake/helpers.cmake

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -108,18 +108,25 @@ function(add_ur_target_compile_options name)
108108
elseif(MSVC)
109109
target_compile_options(${name} PRIVATE
110110
$<$<CXX_COMPILER_ID:MSVC>:/MP> # clang-cl.exe does not support /MP
111-
/W3
111+
/W4
112+
/wd4456 # Disable: declaration of 'identifier' hides previous local declaration
113+
/wd4457 # Disable: declaration of 'identifier' hides function parameter
114+
/wd4458 # Disable: declaration of 'identifier' hides class member
115+
/wd4459 # Disable: declaration of 'identifier' hides global declaration
112116
/MD$<$<CONFIG:Debug>:d>
113117
/GS
114118
/DWIN32_LEAN_AND_MEAN
115119
/DNOMINMAX
116120
)
117121

118-
if(UR_DEVELOPER_MODE)
122+
target_compile_definitions(${name} PRIVATE
119123
# _CRT_SECURE_NO_WARNINGS used mainly because of getenv
120-
# C4267: The compiler detected a conversion from size_t to a smaller type.
124+
_CRT_SECURE_NO_WARNINGS
125+
)
126+
127+
if(UR_DEVELOPER_MODE)
121128
target_compile_options(${name} PRIVATE
122-
/WX /GS /D_CRT_SECURE_NO_WARNINGS /wd4267
129+
/WX /GS
123130
)
124131
endif()
125132
endif()

examples/collector/collector.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,14 @@
2525
#include <string_view>
2626

2727
#include "ur_api.h"
28+
29+
#ifdef _MSC_VER
30+
#pragma warning(disable : 4245)
31+
#endif
2832
#include "xpti/xpti_trace_framework.h"
33+
#ifdef _MSC_VER
34+
#pragma warning(default : 4245)
35+
#endif
2936

3037
constexpr uint16_t TRACE_FN_BEGIN =
3138
static_cast<uint16_t>(xpti::trace_point_type_t::function_with_args_begin);

include/ur_api.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -426,7 +426,7 @@ typedef struct ur_physical_mem_handle_t_ *ur_physical_mem_handle_t;
426426
///////////////////////////////////////////////////////////////////////////////
427427
#ifndef UR_BIT
428428
/// @brief Generic macro for enumerator bit masks
429-
#define UR_BIT(_i) (1 << _i)
429+
#define UR_BIT(_i) (1U << _i)
430430
#endif // UR_BIT
431431

432432
///////////////////////////////////////////////////////////////////////////////

scripts/core/common.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ name: "$x_physical_mem_handle_t"
134134
type: macro
135135
desc: "Generic macro for enumerator bit masks"
136136
name: "$X_BIT( _i )"
137-
value: "( 1 << _i )"
137+
value: "( 1U << _i )"
138138
--- #--------------------------------------------------------------------------
139139
type: enum
140140
desc: "Defines Return/Error codes"

source/adapters/cuda/CMakeLists.txt

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,15 +97,16 @@ if (UR_ENABLE_TRACING)
9797
get_target_property(XPTI_SRC_DIR xpti SOURCE_DIR)
9898
set(XPTI_PROXY_SRC "${XPTI_SRC_DIR}/xpti_proxy.cpp")
9999
endif()
100-
target_compile_definitions(${TARGET_NAME} PRIVATE
100+
add_library(cuda-xpti-proxy STATIC ${XPTI_PROXY_SRC})
101+
target_compile_definitions(cuda-xpti-proxy PRIVATE
101102
XPTI_ENABLE_INSTRUMENTATION
102103
XPTI_STATIC_LIBRARY
103104
)
104-
target_include_directories(${TARGET_NAME} PRIVATE
105+
target_include_directories(cuda-xpti-proxy PRIVATE
105106
${XPTI_INCLUDES}
106107
${CUDA_CUPTI_INCLUDE_DIR}
107108
)
108-
target_sources(${TARGET_NAME} PRIVATE ${XPTI_PROXY_SRC})
109+
target_link_libraries(${TARGET_NAME} PRIVATE cuda-xpti-proxy)
109110
endif()
110111

111112
if (CUDA_cupti_LIBRARY)

source/adapters/cuda/command_buffer.cpp

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ static ur_result_t enqueueCommandBufferFillHelper(
242242
if ((PatternSize == 1) || (PatternSize == 2) || (PatternSize == 4)) {
243243
CUDA_MEMSET_NODE_PARAMS NodeParams = {};
244244
NodeParams.dst = DstPtr;
245-
NodeParams.elementSize = PatternSize;
245+
NodeParams.elementSize = static_cast<unsigned int>(PatternSize);
246246
NodeParams.height = N;
247247
NodeParams.pitch = PatternSize;
248248
NodeParams.width = 1;
@@ -508,12 +508,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
508508
auto &ArgIndices = hKernel->getArgIndices();
509509
CUDA_KERNEL_NODE_PARAMS NodeParams = {};
510510
NodeParams.func = CuFunc;
511-
NodeParams.gridDimX = BlocksPerGrid[0];
512-
NodeParams.gridDimY = BlocksPerGrid[1];
513-
NodeParams.gridDimZ = BlocksPerGrid[2];
514-
NodeParams.blockDimX = ThreadsPerBlock[0];
515-
NodeParams.blockDimY = ThreadsPerBlock[1];
516-
NodeParams.blockDimZ = ThreadsPerBlock[2];
511+
NodeParams.gridDimX = static_cast<unsigned int>(BlocksPerGrid[0]);
512+
NodeParams.gridDimY = static_cast<unsigned int>(BlocksPerGrid[1]);
513+
NodeParams.gridDimZ = static_cast<unsigned int>(BlocksPerGrid[2]);
514+
NodeParams.blockDimX = static_cast<unsigned int>(ThreadsPerBlock[0]);
515+
NodeParams.blockDimY = static_cast<unsigned int>(ThreadsPerBlock[1]);
516+
NodeParams.blockDimZ = static_cast<unsigned int>(ThreadsPerBlock[2]);
517517
NodeParams.sharedMemBytes = LocalSize;
518518
NodeParams.kernelParams = const_cast<void **>(ArgIndices.data());
519519

@@ -1397,12 +1397,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
13971397
CUDA_KERNEL_NODE_PARAMS &Params = KernelCommandHandle->Params;
13981398

13991399
Params.func = CuFunc;
1400-
Params.gridDimX = BlocksPerGrid[0];
1401-
Params.gridDimY = BlocksPerGrid[1];
1402-
Params.gridDimZ = BlocksPerGrid[2];
1403-
Params.blockDimX = ThreadsPerBlock[0];
1404-
Params.blockDimY = ThreadsPerBlock[1];
1405-
Params.blockDimZ = ThreadsPerBlock[2];
1400+
Params.gridDimX = static_cast<unsigned int>(BlocksPerGrid[0]);
1401+
Params.gridDimY = static_cast<unsigned int>(BlocksPerGrid[1]);
1402+
Params.gridDimZ = static_cast<unsigned int>(BlocksPerGrid[2]);
1403+
Params.blockDimX = static_cast<unsigned int>(ThreadsPerBlock[0]);
1404+
Params.blockDimY = static_cast<unsigned int>(ThreadsPerBlock[1]);
1405+
Params.blockDimZ = static_cast<unsigned int>(ThreadsPerBlock[2]);
14061406
Params.sharedMemBytes = KernelCommandHandle->Kernel->getLocalSize();
14071407
Params.kernelParams =
14081408
const_cast<void **>(KernelCommandHandle->Kernel->getArgIndices().data());

source/adapters/cuda/device.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1153,7 +1153,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform,
11531153

11541154
try {
11551155
if (pNumDevices) {
1156-
*pNumDevices = NumDevices;
1156+
*pNumDevices = static_cast<uint32_t>(NumDevices);
11571157
}
11581158

11591159
if (ReturnDevices && phDevices) {
@@ -1236,7 +1236,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle(
12361236
ur_result_t UR_APICALL urDeviceGetGlobalTimestamps(ur_device_handle_t hDevice,
12371237
uint64_t *pDeviceTimestamp,
12381238
uint64_t *pHostTimestamp) {
1239-
CUevent Event;
1239+
CUevent Event{};
12401240
ScopedContext Active(hDevice);
12411241

12421242
if (pDeviceTimestamp) {

source/adapters/cuda/enqueue.cpp

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ void guessLocalWorkSize(ur_device_handle_t Device, size_t *ThreadsPerBlock,
160160
int MinGrid, MaxBlockSize;
161161
UR_CHECK_ERROR(cuOccupancyMaxPotentialBlockSize(
162162
&MinGrid, &MaxBlockSize, Kernel->get(), NULL, Kernel->getLocalSize(),
163-
MaxBlockDim[0]));
163+
static_cast<int>(MaxBlockDim[0])));
164164

165165
roundToHighestFactorOfGlobalSizeIn3d(ThreadsPerBlock, GlobalSizeNormalized,
166166
MaxBlockDim, MaxBlockSize);
@@ -208,7 +208,7 @@ setKernelParams([[maybe_unused]] const ur_context_handle_t Context,
208208
MaxWorkGroupSize = Device->getMaxWorkGroupSize();
209209

210210
if (ProvidedLocalWorkGroupSize) {
211-
auto IsValid = [&](int Dim) {
211+
auto IsValid = [&](size_t Dim) {
212212
if (ReqdThreadsPerBlock[Dim] != 0 &&
213213
LocalWorkSize[Dim] != ReqdThreadsPerBlock[Dim])
214214
return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
@@ -217,7 +217,8 @@ setKernelParams([[maybe_unused]] const ur_context_handle_t Context,
217217
LocalWorkSize[Dim] > MaxThreadsPerBlock[Dim])
218218
return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
219219

220-
if (LocalWorkSize[Dim] > Device->getMaxWorkItemSizes(Dim))
220+
if (LocalWorkSize[Dim] >
221+
Device->getMaxWorkItemSizes(static_cast<int>(Dim)))
221222
return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
222223
// Checks that local work sizes are a divisor of the global work sizes
223224
// which includes that the local work sizes are neither larger than
@@ -489,9 +490,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
489490

490491
auto &ArgIndices = hKernel->getArgIndices();
491492
UR_CHECK_ERROR(cuLaunchKernel(
492-
CuFunc, BlocksPerGrid[0], BlocksPerGrid[1], BlocksPerGrid[2],
493-
ThreadsPerBlock[0], ThreadsPerBlock[1], ThreadsPerBlock[2], LocalSize,
494-
CuStream, const_cast<void **>(ArgIndices.data()), nullptr));
493+
CuFunc, static_cast<unsigned int>(BlocksPerGrid[0]),
494+
static_cast<unsigned int>(BlocksPerGrid[1]),
495+
static_cast<unsigned int>(BlocksPerGrid[2]),
496+
static_cast<unsigned int>(ThreadsPerBlock[0]),
497+
static_cast<unsigned int>(ThreadsPerBlock[1]),
498+
static_cast<unsigned int>(ThreadsPerBlock[2]), LocalSize, CuStream,
499+
const_cast<void **>(ArgIndices.data()), nullptr));
495500

496501
if (LocalSize != 0)
497502
hKernel->clearLocalSize();
@@ -657,12 +662,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp(
657662
auto &ArgIndices = hKernel->getArgIndices();
658663

659664
CUlaunchConfig launch_config;
660-
launch_config.gridDimX = BlocksPerGrid[0];
661-
launch_config.gridDimY = BlocksPerGrid[1];
662-
launch_config.gridDimZ = BlocksPerGrid[2];
663-
launch_config.blockDimX = ThreadsPerBlock[0];
664-
launch_config.blockDimY = ThreadsPerBlock[1];
665-
launch_config.blockDimZ = ThreadsPerBlock[2];
665+
launch_config.gridDimX = static_cast<unsigned int>(BlocksPerGrid[0]);
666+
launch_config.gridDimY = static_cast<unsigned int>(BlocksPerGrid[1]);
667+
launch_config.gridDimZ = static_cast<unsigned int>(BlocksPerGrid[2]);
668+
launch_config.blockDimX = static_cast<unsigned int>(ThreadsPerBlock[0]);
669+
launch_config.blockDimY = static_cast<unsigned int>(ThreadsPerBlock[1]);
670+
launch_config.blockDimZ = static_cast<unsigned int>(ThreadsPerBlock[2]);
666671

667672
launch_config.sharedMemBytes = LocalSize;
668673
launch_config.hStream = CuStream;
@@ -1075,8 +1080,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill(
10751080
break;
10761081
}
10771082
default: {
1078-
UR_CHECK_ERROR(commonMemSetLargePattern(Stream, patternSize, size,
1079-
pPattern, DstDevice));
1083+
UR_CHECK_ERROR(
1084+
commonMemSetLargePattern(Stream, static_cast<uint32_t>(patternSize),
1085+
size, pPattern, DstDevice));
10801086
break;
10811087
}
10821088
}
@@ -1108,7 +1114,6 @@ static size_t imageElementByteSize(CUDA_ARRAY_DESCRIPTOR ArrayDesc) {
11081114
return 4;
11091115
default:
11101116
detail::ur::die("Invalid image format.");
1111-
return 0;
11121117
}
11131118
}
11141119

@@ -1212,7 +1217,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead(
12121217
CUDA_ARRAY_DESCRIPTOR ArrayDesc;
12131218
UR_CHECK_ERROR(cuArrayGetDescriptor(&ArrayDesc, Array));
12141219

1215-
int ElementByteSize = imageElementByteSize(ArrayDesc);
1220+
int ElementByteSize = static_cast<int>(imageElementByteSize(ArrayDesc));
12161221

12171222
size_t ByteOffsetX = origin.x * ElementByteSize * ArrayDesc.NumChannels;
12181223
size_t BytesToCopy = ElementByteSize * ArrayDesc.NumChannels * region.width;
@@ -1285,7 +1290,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite(
12851290
CUDA_ARRAY_DESCRIPTOR ArrayDesc;
12861291
UR_CHECK_ERROR(cuArrayGetDescriptor(&ArrayDesc, Array));
12871292

1288-
int ElementByteSize = imageElementByteSize(ArrayDesc);
1293+
int ElementByteSize = static_cast<int>(imageElementByteSize(ArrayDesc));
12891294

12901295
size_t ByteOffsetX = origin.x * ElementByteSize * ArrayDesc.NumChannels;
12911296
size_t BytesToCopy = ElementByteSize * ArrayDesc.NumChannels * region.width;
@@ -1364,7 +1369,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy(
13641369
UR_ASSERT(SrcArrayDesc.NumChannels == DstArrayDesc.NumChannels,
13651370
UR_RESULT_ERROR_INVALID_MEM_OBJECT);
13661371

1367-
int ElementByteSize = imageElementByteSize(SrcArrayDesc);
1372+
int ElementByteSize = static_cast<int>(imageElementByteSize(SrcArrayDesc));
13681373

13691374
size_t DstByteOffsetX =
13701375
dstOrigin.x * ElementByteSize * SrcArrayDesc.NumChannels;
@@ -1549,8 +1554,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill(
15491554
CuStream));
15501555
break;
15511556
default:
1552-
commonMemSetLargePattern(CuStream, patternSize, size, pPattern,
1553-
(CUdeviceptr)ptr);
1557+
commonMemSetLargePattern(CuStream, static_cast<uint32_t>(patternSize),
1558+
size, pPattern, (CUdeviceptr)ptr);
15541559
break;
15551560
}
15561561
if (phEvent) {

source/adapters/cuda/image.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -284,8 +284,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPitchedAllocExp(
284284
ur_result_t Result = UR_RESULT_SUCCESS;
285285
try {
286286
ScopedContext Active(hDevice);
287-
UR_CHECK_ERROR(cuMemAllocPitch((CUdeviceptr *)ppMem, pResultPitch,
288-
widthInBytes, height, elementSizeBytes));
287+
UR_CHECK_ERROR(
288+
cuMemAllocPitch((CUdeviceptr *)ppMem, pResultPitch, widthInBytes,
289+
height, static_cast<unsigned int>(elementSizeBytes)));
289290
} catch (ur_result_t error) {
290291
Result = error;
291292
} catch (...) {

source/adapters/cuda/kernel.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -203,8 +203,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
203203

204204
int MaxNumActiveGroupsPerCU{0};
205205
UR_CHECK_ERROR(cuOccupancyMaxActiveBlocksPerMultiprocessor(
206-
&MaxNumActiveGroupsPerCU, hKernel->get(), localWorkSize,
207-
dynamicSharedMemorySize));
206+
&MaxNumActiveGroupsPerCU, hKernel->get(),
207+
static_cast<int>(localWorkSize), dynamicSharedMemorySize));
208208
detail::ur::assertion(MaxNumActiveGroupsPerCU >= 0);
209209
// Handle the case where we can't have all SMs active with at least 1 group
210210
// per SM. In that case, the device is still able to run 1 work-group, hence

0 commit comments

Comments
 (0)