Skip to content

Commit 5104576

Browse files
authored
[MIOpen] Bugfix Use uint64_t instead of long for offsets (#3381)
## Motivation When porting OpenCL kernels the `ulong` datatype was often ported to `unsigned long`. This is is problematic since the size of long on windows is 32 bits. This MR replaces the uses of long with `uint64_t`. refs: #3364 ## Test Plan The tests should be extended in a follow up MR to cover these cases as well. ## Submission Checklist - [ ] Look over the contributing guidelines at https://github.com/ROCm/ROCm/blob/develop/CONTRIBUTING.md#pull-requests.
1 parent 50d762b commit 5104576

File tree

8 files changed

+147
-144
lines changed

8 files changed

+147
-144
lines changed

projects/miopen/src/kernels/MIOpenCol2Im3d.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ extern "C" __global__ void Col2Im3dU(FLOAT* col,
5959
const unsigned int height,
6060
const unsigned int width,
6161
FLOAT* im,
62-
const unsigned long im_offset)
62+
const uint64_t im_offset)
6363
{
6464
FLOAT* im_off = im + im_offset;
6565
unsigned int gid = blockIdx.x * blockDim.x + threadIdx.x;

projects/miopen/src/kernels/MIOpenConvFFT.cpp

Lines changed: 102 additions & 101 deletions
Large diffs are not rendered by default.

projects/miopen/src/kernels/MIOpenDropoutHIP.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030

3131
// Workaround to overcome redefinition errors while including rocrand header files directly
3232
#include "miopen_rocrand.hpp"
33+
#include "miopen_cstdint.hpp"
3334

3435
#ifndef MIOPEN_USE_FP32
3536
#define MIOPEN_USE_FP32 0
@@ -63,7 +64,7 @@
6364
* @param states_num The number of elements in the state array.
6465
*/
6566
extern "C" __global__ void
66-
InitKernelStateHIP(rocrand_state_xorwow* state, ulong prng_seed, ulong states_num)
67+
InitKernelStateHIP(rocrand_state_xorwow* state, uint64_t prng_seed, uint64_t states_num)
6768
{
6869
// Get the index of the current element
6970
size_t index = blockIdx.x * blockDim.x + threadIdx.x;

projects/miopen/src/kernels/MIOpenMultiMarginLoss.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ __device__ void multimarginlossforward2d(const DTYPE* __restrict__ I,
3737
const uint64_t* __restrict__ T,
3838
const DTYPE* __restrict__ W,
3939
void* __restrict__ O,
40-
const long p,
40+
const int64_t p,
4141
const float margin,
4242
tensor_view_t<2> I_tv,
4343
tensor_view_t<1> T_tv,
@@ -88,7 +88,7 @@ extern "C" __global__ void MultiMarginLossForward2d(const FLOAT* __restrict__ I,
8888
const uint64_t* __restrict__ T,
8989
const FLOAT* __restrict__ W,
9090
void* __restrict__ O,
91-
const long p,
91+
const int64_t p,
9292
const float margin,
9393
tensor_view_t<2> I_tv,
9494
tensor_view_t<1> T_tv,

projects/miopen/src/kernels/MIOpenNeuron.cpp

Lines changed: 27 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#endif
3333

3434
#include "activation_functions.hpp"
35+
#include "miopen_cstdint.hpp"
3536

3637
#ifdef LITE
3738

@@ -55,8 +56,8 @@ extern "C" __global__ void MIOpenActiveFwdLite(const FP_TYPE* bot,
5556
FP_TYPE gamma,
5657
FP_TYPE beta,
5758
FP_TYPE alpha,
58-
const long bot_offset,
59-
const long top_offset)
59+
const int64_t bot_offset,
60+
const int64_t top_offset)
6061
{
6162
const unsigned int tid = blockIdx.x * LOCAL_SIZE + threadIdx.x;
6263
const unsigned int index = tid * MIOPEN_READ_UNIT;
@@ -84,8 +85,8 @@ extern "C" __global__ void MIOpenActiveFwd2DLite(const FP_TYPE* bot,
8485
FP_TYPE gamma,
8586
FP_TYPE beta,
8687
FP_TYPE alpha,
87-
const long bot_offset,
88-
const long top_offset,
88+
const int64_t bot_offset,
89+
const int64_t top_offset,
8990
const uint bot_stride,
9091
const uint top_stride)
9192
{
@@ -123,10 +124,10 @@ extern "C" __global__ void MIOpenActiveBwdLite(FP_TYPE* bot_diff,
123124
FP_TYPE gamma,
124125
FP_TYPE beta,
125126
FP_TYPE alpha,
126-
const long bot_diff_offset,
127-
const long top_diff_offset,
128-
const long bot_offset,
129-
const long top_offset)
127+
const int64_t bot_diff_offset,
128+
const int64_t top_diff_offset,
129+
const int64_t bot_offset,
130+
const int64_t top_offset)
130131
{
131132
const unsigned int tid = blockIdx.x * LOCAL_SIZE + threadIdx.x;
132133
int index = tid * MIOPEN_READ_UNIT;
@@ -163,14 +164,14 @@ extern "C" __global__ void MIOpenActiveBwd2DLite(FP_TYPE* bot_diff,
163164
FP_TYPE gamma,
164165
FP_TYPE beta,
165166
FP_TYPE alpha,
166-
const long bot_diff_offset,
167-
const long top_diff_offset,
168-
const long bot_offset,
169-
const long top_offset,
170-
const uint bot_diff_stride,
171-
const uint top_diff_stride,
172-
const uint bot_stride,
173-
const uint top_stride)
167+
const int64_t bot_diff_offset,
168+
const int64_t top_diff_offset,
169+
const int64_t bot_offset,
170+
const int64_t top_offset,
171+
const uint32_t bot_diff_stride,
172+
const uint32_t top_diff_stride,
173+
const uint32_t bot_stride,
174+
const uint32_t top_stride)
174175
{
175176
const unsigned int x_id = blockIdx.x * LOCAL_SIZE + threadIdx.x;
176177
const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
@@ -181,10 +182,10 @@ extern "C" __global__ void MIOpenActiveBwd2DLite(FP_TYPE* bot_diff,
181182
if(y >= height)
182183
return;
183184

184-
uint bot_diff_index = y * bot_diff_stride + x_id * MIOPEN_READ_UNIT;
185-
uint top_diff_index = y * top_diff_stride + x_id * MIOPEN_READ_UNIT;
186-
uint bot_index = y * bot_stride + x_id * MIOPEN_READ_UNIT;
187-
uint top_index = y * top_stride + x_id * MIOPEN_READ_UNIT;
185+
uint32_t bot_diff_index = y * bot_diff_stride + x_id * MIOPEN_READ_UNIT;
186+
uint32_t top_diff_index = y * top_diff_stride + x_id * MIOPEN_READ_UNIT;
187+
uint32_t bot_index = y * bot_stride + x_id * MIOPEN_READ_UNIT;
188+
uint32_t top_index = y * top_stride + x_id * MIOPEN_READ_UNIT;
188189

189190
FP_TYPE bot_diff_dat[MIOPEN_READ_UNIT];
190191
FP_TYPE top_diff_dat[MIOPEN_READ_UNIT];
@@ -215,8 +216,8 @@ __launch_bounds__(
215216
FP_TYPE gamma,
216217
FP_TYPE beta,
217218
FP_TYPE alpha,
218-
const long xOffset,
219-
const long yOffset)
219+
const int64_t xOffset,
220+
const int64_t yOffset)
220221
{
221222
const unsigned int x = blockIdx.x * MIOPEN_NRN_GROUP_SZ0 + threadIdx.x; // channel x
222223

@@ -339,10 +340,10 @@ __launch_bounds__(
339340
FP_TYPE gamma,
340341
FP_TYPE beta,
341342
FP_TYPE alpha,
342-
const long dxOffset,
343-
const long dyOffset,
344-
const long xOffset,
345-
const long yOffset)
343+
const int64_t dxOffset,
344+
const int64_t dyOffset,
345+
const int64_t xOffset,
346+
const int64_t yOffset)
346347
{
347348
const unsigned int x = blockIdx.x * MIOPEN_NRN_GROUP_SZ0 + threadIdx.x;
348349

projects/miopen/src/kernels/MIOpenPoolingForwardNaive.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
#error "MLO_POOLING_IS2D_KERNEL must be defined"
4747
#endif
4848

49-
using arg_size_t = unsigned long;
49+
using arg_size_t = uint64_t;
5050

5151
extern "C" __global__ void mloPoolingForwardNaive(const FLOAT* bot_ptr,
5252
FLOAT* top_ptr,

projects/miopen/src/kernels/MIOpenTensorKernelsHip.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -166,10 +166,10 @@ extern "C" __global__ void Op2dTensorSquash(const MIOPEN_TYPE* a,
166166
const MIOPEN_TYPE alpha0,
167167
const MIOPEN_TYPE alpha1,
168168
const MIOPEN_TYPE beta,
169-
const long Aoffset,
170-
const long Boffset,
171-
const long Coffset,
172-
const long total_work,
169+
const int64_t Aoffset,
170+
const int64_t Boffset,
171+
const int64_t Coffset,
172+
const int64_t total_work,
173173
const int use_apl0,
174174
const int use_apl1,
175175
const int use_bet)
@@ -448,9 +448,9 @@ extern "C" __global__ void Op4dTensorGeneric(MIOPEN_TYPE* a,
448448
const MIOPEN_TYPE beta,
449449
const unsigned int bitmap,
450450
const int work_per_wg,
451-
const long Aoffset,
452-
const long Boffset,
453-
const long Coffset,
451+
const int64_t Aoffset,
452+
const int64_t Boffset,
453+
const int64_t Coffset,
454454
const int num_wg)
455455
{
456456
int gid = blockIdx.x;
@@ -544,10 +544,10 @@ extern "C" __global__ void Op4dTensorLite(const MIOPEN_TYPE* a,
544544
const MIOPEN_TYPE alpha0,
545545
const MIOPEN_TYPE alpha1,
546546
const MIOPEN_TYPE beta,
547-
const long Aoffset,
548-
const long Boffset,
549-
const long Coffset,
550-
const long total_work,
547+
const int64_t Aoffset,
548+
const int64_t Boffset,
549+
const int64_t Coffset,
550+
const int64_t total_work,
551551
const int use_beta)
552552
{
553553
int gid0 = blockIdx.x * blockDim.x + threadIdx.x;

projects/miopen/src/ocl/utilocl.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -544,7 +544,7 @@ float Col2Im3dGPU(const Handle& handle,
544544
const uint32_t in_h,
545545
const uint32_t in_w,
546546
Data_t im,
547-
std::size_t im_offset,
547+
const uint64_t im_offset,
548548
miopenDataType_t type)
549549
{
550550
std::string program_name = "MIOpenCol2Im3d.cpp";

0 commit comments

Comments
 (0)