@@ -160,7 +160,7 @@ void guessLocalWorkSize(ur_device_handle_t Device, size_t *ThreadsPerBlock,
160160 int MinGrid, MaxBlockSize;
161161 UR_CHECK_ERROR (cuOccupancyMaxPotentialBlockSize (
162162 &MinGrid, &MaxBlockSize, Kernel->get (), NULL , Kernel->getLocalSize (),
163- static_cast < int >( MaxBlockDim[0 ]) ));
163+ MaxBlockDim[0 ]));
164164
165165 roundToHighestFactorOfGlobalSizeIn3d (ThreadsPerBlock, GlobalSizeNormalized,
166166 MaxBlockDim, MaxBlockSize);
@@ -208,7 +208,7 @@ setKernelParams([[maybe_unused]] const ur_context_handle_t Context,
208208 MaxWorkGroupSize = Device->getMaxWorkGroupSize ();
209209
210210 if (ProvidedLocalWorkGroupSize) {
211- auto IsValid = [&](size_t Dim) {
211+ auto IsValid = [&](int Dim) {
212212 if (ReqdThreadsPerBlock[Dim] != 0 &&
213213 LocalWorkSize[Dim] != ReqdThreadsPerBlock[Dim])
214214 return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
@@ -217,8 +217,7 @@ setKernelParams([[maybe_unused]] const ur_context_handle_t Context,
217217 LocalWorkSize[Dim] > MaxThreadsPerBlock[Dim])
218218 return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
219219
220- if (LocalWorkSize[Dim] >
221- Device->getMaxWorkItemSizes (static_cast <int >(Dim)))
220+ if (LocalWorkSize[Dim] > Device->getMaxWorkItemSizes (Dim))
222221 return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
223222 // Checks that local work sizes are a divisor of the global work sizes
224223 // which includes that the local work sizes are neither larger than
@@ -482,13 +481,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
482481
483482 auto &ArgIndices = hKernel->getArgIndices ();
484483 UR_CHECK_ERROR (cuLaunchKernel (
485- CuFunc, static_cast <unsigned int >(BlocksPerGrid[0 ]),
486- static_cast <unsigned int >(BlocksPerGrid[1 ]),
487- static_cast <unsigned int >(BlocksPerGrid[2 ]),
488- static_cast <unsigned int >(ThreadsPerBlock[0 ]),
489- static_cast <unsigned int >(ThreadsPerBlock[1 ]),
490- static_cast <unsigned int >(ThreadsPerBlock[2 ]), LocalSize, CuStream,
491- const_cast <void **>(ArgIndices.data ()), nullptr ));
484+ CuFunc, BlocksPerGrid[0 ], BlocksPerGrid[1 ], BlocksPerGrid[2 ],
485+ ThreadsPerBlock[0 ], ThreadsPerBlock[1 ], ThreadsPerBlock[2 ], LocalSize,
486+ CuStream, const_cast <void **>(ArgIndices.data ()), nullptr ));
492487
493488 if (LocalSize != 0 )
494489 hKernel->clearLocalSize ();
@@ -654,12 +649,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp(
654649 auto &ArgIndices = hKernel->getArgIndices ();
655650
656651 CUlaunchConfig launch_config;
657- launch_config.gridDimX = static_cast < unsigned int >( BlocksPerGrid[0 ]) ;
658- launch_config.gridDimY = static_cast < unsigned int >( BlocksPerGrid[1 ]) ;
659- launch_config.gridDimZ = static_cast < unsigned int >( BlocksPerGrid[2 ]) ;
660- launch_config.blockDimX = static_cast < unsigned int >( ThreadsPerBlock[0 ]) ;
661- launch_config.blockDimY = static_cast < unsigned int >( ThreadsPerBlock[1 ]) ;
662- launch_config.blockDimZ = static_cast < unsigned int >( ThreadsPerBlock[2 ]) ;
652+ launch_config.gridDimX = BlocksPerGrid[0 ];
653+ launch_config.gridDimY = BlocksPerGrid[1 ];
654+ launch_config.gridDimZ = BlocksPerGrid[2 ];
655+ launch_config.blockDimX = ThreadsPerBlock[0 ];
656+ launch_config.blockDimY = ThreadsPerBlock[1 ];
657+ launch_config.blockDimZ = ThreadsPerBlock[2 ];
663658
664659 launch_config.sharedMemBytes = LocalSize;
665660 launch_config.hStream = CuStream;
@@ -984,9 +979,8 @@ ur_result_t commonMemSetLargePattern(CUstream Stream, uint32_t PatternSize,
984979 auto OffsetPtr = Ptr + (step * sizeof (uint8_t ));
985980
986981 // set all of the pattern chunks
987- UR_CHECK_ERROR (cuMemsetD2D8Async (OffsetPtr, Pitch,
988- static_cast <unsigned char >(Value),
989- sizeof (uint8_t ), Height, Stream));
982+ UR_CHECK_ERROR (cuMemsetD2D8Async (OffsetPtr, Pitch, Value, sizeof (uint8_t ),
983+ Height, Stream));
990984 }
991985 return UR_RESULT_SUCCESS;
992986}
@@ -1037,9 +1031,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill(
10371031 break ;
10381032 }
10391033 default : {
1040- UR_CHECK_ERROR (
1041- commonMemSetLargePattern (Stream, static_cast <uint32_t >(patternSize),
1042- size, pPattern, DstDevice));
1034+ UR_CHECK_ERROR (commonMemSetLargePattern (Stream, patternSize, size,
1035+ pPattern, DstDevice));
10431036 break ;
10441037 }
10451038 }
@@ -1071,6 +1064,7 @@ static size_t imageElementByteSize(CUDA_ARRAY_DESCRIPTOR ArrayDesc) {
10711064 return 4 ;
10721065 default :
10731066 detail::ur::die (" Invalid image format." );
1067+ return 0 ;
10741068 }
10751069}
10761070
@@ -1174,7 +1168,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead(
11741168 CUDA_ARRAY_DESCRIPTOR ArrayDesc;
11751169 UR_CHECK_ERROR (cuArrayGetDescriptor (&ArrayDesc, Array));
11761170
1177- int ElementByteSize = static_cast < int >( imageElementByteSize (ArrayDesc) );
1171+ int ElementByteSize = imageElementByteSize (ArrayDesc);
11781172
11791173 size_t ByteOffsetX = origin.x * ElementByteSize * ArrayDesc.NumChannels ;
11801174 size_t BytesToCopy = ElementByteSize * ArrayDesc.NumChannels * region.width ;
@@ -1247,7 +1241,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite(
12471241 CUDA_ARRAY_DESCRIPTOR ArrayDesc;
12481242 UR_CHECK_ERROR (cuArrayGetDescriptor (&ArrayDesc, Array));
12491243
1250- int ElementByteSize = static_cast < int >( imageElementByteSize (ArrayDesc) );
1244+ int ElementByteSize = imageElementByteSize (ArrayDesc);
12511245
12521246 size_t ByteOffsetX = origin.x * ElementByteSize * ArrayDesc.NumChannels ;
12531247 size_t BytesToCopy = ElementByteSize * ArrayDesc.NumChannels * region.width ;
@@ -1326,7 +1320,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy(
13261320 UR_ASSERT (SrcArrayDesc.NumChannels == DstArrayDesc.NumChannels ,
13271321 UR_RESULT_ERROR_INVALID_MEM_OBJECT);
13281322
1329- int ElementByteSize = static_cast < int >( imageElementByteSize (SrcArrayDesc) );
1323+ int ElementByteSize = imageElementByteSize (SrcArrayDesc);
13301324
13311325 size_t DstByteOffsetX =
13321326 dstOrigin.x * ElementByteSize * SrcArrayDesc.NumChannels ;
@@ -1511,8 +1505,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill(
15111505 CuStream));
15121506 break ;
15131507 default :
1514- commonMemSetLargePattern (CuStream, static_cast < uint32_t >( patternSize) ,
1515- size, pPattern, (CUdeviceptr)ptr);
1508+ commonMemSetLargePattern (CuStream, patternSize, size, pPattern ,
1509+ (CUdeviceptr)ptr);
15161510 break ;
15171511 }
15181512 if (phEvent) {
0 commit comments