@@ -160,7 +160,7 @@ void guessLocalWorkSize(ur_device_handle_t Device, size_t *ThreadsPerBlock,
160160 int MinGrid, MaxBlockSize;
161161 UR_CHECK_ERROR (cuOccupancyMaxPotentialBlockSize (
162162 &MinGrid, &MaxBlockSize, Kernel->get (), NULL , Kernel->getLocalSize (),
163- MaxBlockDim[0 ]));
163+ static_cast < int >( MaxBlockDim[0 ]) ));
164164
165165 roundToHighestFactorOfGlobalSizeIn3d (ThreadsPerBlock, GlobalSizeNormalized,
166166 MaxBlockDim, MaxBlockSize);
@@ -208,7 +208,7 @@ setKernelParams([[maybe_unused]] const ur_context_handle_t Context,
208208 MaxWorkGroupSize = Device->getMaxWorkGroupSize ();
209209
210210 if (ProvidedLocalWorkGroupSize) {
211- auto IsValid = [&](int Dim) {
211+ auto IsValid = [&](size_t Dim) {
212212 if (ReqdThreadsPerBlock[Dim] != 0 &&
213213 LocalWorkSize[Dim] != ReqdThreadsPerBlock[Dim])
214214 return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
@@ -217,7 +217,8 @@ setKernelParams([[maybe_unused]] const ur_context_handle_t Context,
217217 LocalWorkSize[Dim] > MaxThreadsPerBlock[Dim])
218218 return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
219219
220- if (LocalWorkSize[Dim] > Device->getMaxWorkItemSizes (Dim))
220+ if (LocalWorkSize[Dim] >
221+ Device->getMaxWorkItemSizes (static_cast <int >(Dim)))
221222 return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
222223 // Checks that local work sizes are a divisor of the global work sizes
223224 // which includes that the local work sizes are neither larger than
@@ -489,9 +490,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
489490
490491 auto &ArgIndices = hKernel->getArgIndices ();
491492 UR_CHECK_ERROR (cuLaunchKernel (
492- CuFunc, BlocksPerGrid[0 ], BlocksPerGrid[1 ], BlocksPerGrid[2 ],
493- ThreadsPerBlock[0 ], ThreadsPerBlock[1 ], ThreadsPerBlock[2 ], LocalSize,
494- CuStream, const_cast <void **>(ArgIndices.data ()), nullptr ));
493+ CuFunc, static_cast <unsigned int >(BlocksPerGrid[0 ]),
494+ static_cast <unsigned int >(BlocksPerGrid[1 ]),
495+ static_cast <unsigned int >(BlocksPerGrid[2 ]),
496+ static_cast <unsigned int >(ThreadsPerBlock[0 ]),
497+ static_cast <unsigned int >(ThreadsPerBlock[1 ]),
498+ static_cast <unsigned int >(ThreadsPerBlock[2 ]), LocalSize, CuStream,
499+ const_cast <void **>(ArgIndices.data ()), nullptr ));
495500
496501 if (LocalSize != 0 )
497502 hKernel->clearLocalSize ();
@@ -657,12 +662,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp(
657662 auto &ArgIndices = hKernel->getArgIndices ();
658663
659664 CUlaunchConfig launch_config;
660- launch_config.gridDimX = BlocksPerGrid[0 ];
661- launch_config.gridDimY = BlocksPerGrid[1 ];
662- launch_config.gridDimZ = BlocksPerGrid[2 ];
663- launch_config.blockDimX = ThreadsPerBlock[0 ];
664- launch_config.blockDimY = ThreadsPerBlock[1 ];
665- launch_config.blockDimZ = ThreadsPerBlock[2 ];
665+ launch_config.gridDimX = static_cast < unsigned int >( BlocksPerGrid[0 ]) ;
666+ launch_config.gridDimY = static_cast < unsigned int >( BlocksPerGrid[1 ]) ;
667+ launch_config.gridDimZ = static_cast < unsigned int >( BlocksPerGrid[2 ]) ;
668+ launch_config.blockDimX = static_cast < unsigned int >( ThreadsPerBlock[0 ]) ;
669+ launch_config.blockDimY = static_cast < unsigned int >( ThreadsPerBlock[1 ]) ;
670+ launch_config.blockDimZ = static_cast < unsigned int >( ThreadsPerBlock[2 ]) ;
666671
667672 launch_config.sharedMemBytes = LocalSize;
668673 launch_config.hStream = CuStream;
@@ -1075,8 +1080,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill(
10751080 break ;
10761081 }
10771082 default : {
1078- UR_CHECK_ERROR (commonMemSetLargePattern (Stream, patternSize, size,
1079- pPattern, DstDevice));
1083+ UR_CHECK_ERROR (
1084+ commonMemSetLargePattern (Stream, static_cast <uint32_t >(patternSize),
1085+ size, pPattern, DstDevice));
10801086 break ;
10811087 }
10821088 }
@@ -1108,7 +1114,6 @@ static size_t imageElementByteSize(CUDA_ARRAY_DESCRIPTOR ArrayDesc) {
11081114 return 4 ;
11091115 default :
11101116 detail::ur::die (" Invalid image format." );
1111- return 0 ;
11121117 }
11131118}
11141119
@@ -1212,7 +1217,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead(
12121217 CUDA_ARRAY_DESCRIPTOR ArrayDesc;
12131218 UR_CHECK_ERROR (cuArrayGetDescriptor (&ArrayDesc, Array));
12141219
1215- int ElementByteSize = imageElementByteSize (ArrayDesc);
1220+ int ElementByteSize = static_cast < int >( imageElementByteSize (ArrayDesc) );
12161221
12171222 size_t ByteOffsetX = origin.x * ElementByteSize * ArrayDesc.NumChannels ;
12181223 size_t BytesToCopy = ElementByteSize * ArrayDesc.NumChannels * region.width ;
@@ -1285,7 +1290,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite(
12851290 CUDA_ARRAY_DESCRIPTOR ArrayDesc;
12861291 UR_CHECK_ERROR (cuArrayGetDescriptor (&ArrayDesc, Array));
12871292
1288- int ElementByteSize = imageElementByteSize (ArrayDesc);
1293+ int ElementByteSize = static_cast < int >( imageElementByteSize (ArrayDesc) );
12891294
12901295 size_t ByteOffsetX = origin.x * ElementByteSize * ArrayDesc.NumChannels ;
12911296 size_t BytesToCopy = ElementByteSize * ArrayDesc.NumChannels * region.width ;
@@ -1364,7 +1369,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy(
13641369 UR_ASSERT (SrcArrayDesc.NumChannels == DstArrayDesc.NumChannels ,
13651370 UR_RESULT_ERROR_INVALID_MEM_OBJECT);
13661371
1367- int ElementByteSize = imageElementByteSize (SrcArrayDesc);
1372+ int ElementByteSize = static_cast < int >( imageElementByteSize (SrcArrayDesc) );
13681373
13691374 size_t DstByteOffsetX =
13701375 dstOrigin.x * ElementByteSize * SrcArrayDesc.NumChannels ;
@@ -1549,8 +1554,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill(
15491554 CuStream));
15501555 break ;
15511556 default :
1552- commonMemSetLargePattern (CuStream, patternSize, size, pPattern ,
1553- (CUdeviceptr)ptr);
1557+ commonMemSetLargePattern (CuStream, static_cast < uint32_t >( patternSize) ,
1558+ size, pPattern, (CUdeviceptr)ptr);
15541559 break ;
15551560 }
15561561 if (phEvent) {
0 commit comments