@@ -160,7 +160,7 @@ void guessLocalWorkSize(ur_device_handle_t Device, size_t *ThreadsPerBlock,
160160 int MinGrid, MaxBlockSize;
161161 UR_CHECK_ERROR (cuOccupancyMaxPotentialBlockSize (
162162 &MinGrid, &MaxBlockSize, Kernel->get (), NULL , Kernel->getLocalSize (),
163- MaxBlockDim[0 ]));
163+ static_cast < int >( MaxBlockDim[0 ]) ));
164164
165165 roundToHighestFactorOfGlobalSizeIn3d (ThreadsPerBlock, GlobalSizeNormalized,
166166 MaxBlockDim, MaxBlockSize);
@@ -208,7 +208,7 @@ setKernelParams([[maybe_unused]] const ur_context_handle_t Context,
208208 MaxWorkGroupSize = Device->getMaxWorkGroupSize ();
209209
210210 if (ProvidedLocalWorkGroupSize) {
211- auto IsValid = [&](int Dim) {
211+ auto IsValid = [&](size_t Dim) {
212212 if (ReqdThreadsPerBlock[Dim] != 0 &&
213213 LocalWorkSize[Dim] != ReqdThreadsPerBlock[Dim])
214214 return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
@@ -217,7 +217,8 @@ setKernelParams([[maybe_unused]] const ur_context_handle_t Context,
217217 LocalWorkSize[Dim] > MaxThreadsPerBlock[Dim])
218218 return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
219219
220- if (LocalWorkSize[Dim] > Device->getMaxWorkItemSizes (Dim))
220+ if (LocalWorkSize[Dim] >
221+ Device->getMaxWorkItemSizes (static_cast <int >(Dim)))
221222 return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
222223 // Checks that local work sizes are a divisor of the global work sizes
223224 // which includes that the local work sizes are neither larger than
@@ -481,9 +482,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
481482
482483 auto &ArgIndices = hKernel->getArgIndices ();
483484 UR_CHECK_ERROR (cuLaunchKernel (
484- CuFunc, BlocksPerGrid[0 ], BlocksPerGrid[1 ], BlocksPerGrid[2 ],
485- ThreadsPerBlock[0 ], ThreadsPerBlock[1 ], ThreadsPerBlock[2 ], LocalSize,
486- CuStream, const_cast <void **>(ArgIndices.data ()), nullptr ));
485+ CuFunc, static_cast <unsigned int >(BlocksPerGrid[0 ]),
486+ static_cast <unsigned int >(BlocksPerGrid[1 ]),
487+ static_cast <unsigned int >(BlocksPerGrid[2 ]),
488+ static_cast <unsigned int >(ThreadsPerBlock[0 ]),
489+ static_cast <unsigned int >(ThreadsPerBlock[1 ]),
490+ static_cast <unsigned int >(ThreadsPerBlock[2 ]), LocalSize, CuStream,
491+ const_cast <void **>(ArgIndices.data ()), nullptr ));
487492
488493 if (LocalSize != 0 )
489494 hKernel->clearLocalSize ();
@@ -649,12 +654,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp(
649654 auto &ArgIndices = hKernel->getArgIndices ();
650655
651656 CUlaunchConfig launch_config;
652- launch_config.gridDimX = BlocksPerGrid[0 ];
653- launch_config.gridDimY = BlocksPerGrid[1 ];
654- launch_config.gridDimZ = BlocksPerGrid[2 ];
655- launch_config.blockDimX = ThreadsPerBlock[0 ];
656- launch_config.blockDimY = ThreadsPerBlock[1 ];
657- launch_config.blockDimZ = ThreadsPerBlock[2 ];
657+ launch_config.gridDimX = static_cast < unsigned int >( BlocksPerGrid[0 ]) ;
658+ launch_config.gridDimY = static_cast < unsigned int >( BlocksPerGrid[1 ]) ;
659+ launch_config.gridDimZ = static_cast < unsigned int >( BlocksPerGrid[2 ]) ;
660+ launch_config.blockDimX = static_cast < unsigned int >( ThreadsPerBlock[0 ]) ;
661+ launch_config.blockDimY = static_cast < unsigned int >( ThreadsPerBlock[1 ]) ;
662+ launch_config.blockDimZ = static_cast < unsigned int >( ThreadsPerBlock[2 ]) ;
658663
659664 launch_config.sharedMemBytes = LocalSize;
660665 launch_config.hStream = CuStream;
@@ -979,8 +984,9 @@ ur_result_t commonMemSetLargePattern(CUstream Stream, uint32_t PatternSize,
979984 auto OffsetPtr = Ptr + (step * sizeof (uint8_t ));
980985
981986 // set all of the pattern chunks
982- UR_CHECK_ERROR (cuMemsetD2D8Async (OffsetPtr, Pitch, Value, sizeof (uint8_t ),
983- Height, Stream));
987+ UR_CHECK_ERROR (cuMemsetD2D8Async (OffsetPtr, Pitch,
988+ static_cast <unsigned char >(Value),
989+ sizeof (uint8_t ), Height, Stream));
984990 }
985991 return UR_RESULT_SUCCESS;
986992}
@@ -1031,8 +1037,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill(
10311037 break ;
10321038 }
10331039 default : {
1034- UR_CHECK_ERROR (commonMemSetLargePattern (Stream, patternSize, size,
1035- pPattern, DstDevice));
1040+ UR_CHECK_ERROR (
1041+ commonMemSetLargePattern (Stream, static_cast <uint32_t >(patternSize),
1042+ size, pPattern, DstDevice));
10361043 break ;
10371044 }
10381045 }
@@ -1064,7 +1071,6 @@ static size_t imageElementByteSize(CUDA_ARRAY_DESCRIPTOR ArrayDesc) {
10641071 return 4 ;
10651072 default :
10661073 detail::ur::die (" Invalid image format." );
1067- return 0 ;
10681074 }
10691075}
10701076
@@ -1168,7 +1174,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead(
11681174 CUDA_ARRAY_DESCRIPTOR ArrayDesc;
11691175 UR_CHECK_ERROR (cuArrayGetDescriptor (&ArrayDesc, Array));
11701176
1171- int ElementByteSize = imageElementByteSize (ArrayDesc);
1177+ int ElementByteSize = static_cast < int >( imageElementByteSize (ArrayDesc) );
11721178
11731179 size_t ByteOffsetX = origin.x * ElementByteSize * ArrayDesc.NumChannels ;
11741180 size_t BytesToCopy = ElementByteSize * ArrayDesc.NumChannels * region.width ;
@@ -1241,7 +1247,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite(
12411247 CUDA_ARRAY_DESCRIPTOR ArrayDesc;
12421248 UR_CHECK_ERROR (cuArrayGetDescriptor (&ArrayDesc, Array));
12431249
1244- int ElementByteSize = imageElementByteSize (ArrayDesc);
1250+ int ElementByteSize = static_cast < int >( imageElementByteSize (ArrayDesc) );
12451251
12461252 size_t ByteOffsetX = origin.x * ElementByteSize * ArrayDesc.NumChannels ;
12471253 size_t BytesToCopy = ElementByteSize * ArrayDesc.NumChannels * region.width ;
@@ -1320,7 +1326,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy(
13201326 UR_ASSERT (SrcArrayDesc.NumChannels == DstArrayDesc.NumChannels ,
13211327 UR_RESULT_ERROR_INVALID_MEM_OBJECT);
13221328
1323- int ElementByteSize = imageElementByteSize (SrcArrayDesc);
1329+ int ElementByteSize = static_cast < int >( imageElementByteSize (SrcArrayDesc) );
13241330
13251331 size_t DstByteOffsetX =
13261332 dstOrigin.x * ElementByteSize * SrcArrayDesc.NumChannels ;
@@ -1505,8 +1511,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill(
15051511 CuStream));
15061512 break ;
15071513 default :
1508- commonMemSetLargePattern (CuStream, patternSize, size, pPattern ,
1509- (CUdeviceptr)ptr);
1514+ commonMemSetLargePattern (CuStream, static_cast < uint32_t >( patternSize) ,
1515+ size, pPattern, (CUdeviceptr)ptr);
15101516 break ;
15111517 }
15121518 if (phEvent) {
0 commit comments