Skip to content

Commit a50aaef

Browse files
committed
Merge pull request #115 from hughperkins/develop
Fix kernel crash on nvidia, caused by float4 alignemtn error
2 parents 57e25b9 + dff63b4 commit a50aaef

File tree

1 file changed

+4
-2
lines changed
  • src/library/blas/gens/clTemplates

1 file changed

+4
-2
lines changed

src/library/blas/gens/clTemplates/ger.cl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ __kernel void %PREFIXger_C_kernel( __global %TYPE const* restrict _X, __global %
5252
}
5353

5454
// create local memory
55-
__local %TYPE localX[ BH * %V ];
55+
__local %TYPE%V localXV[ BH ];
56+
__local %TYPE *localX = (__local %TYPE *)localXV;
5657
__local %TYPE localY[ BW ];
5758

5859
uint lID = get_local_id( 0 );
@@ -193,7 +194,8 @@ __kernel void %PREFIXger_R_kernel( __global %TYPE const* restrict _X, __global %
193194
}
194195

195196
__local %TYPE localX[ BH ];
196-
__local %TYPE localY[ BW * %V ];
197+
__local %TYPE%V localYV[ BW ];
198+
__local %TYPE *localY = (__local %TYPE *)localYV;
197199

198200
uint lID = get_local_id( 0 );
199201
uint gID = get_group_id( 0 );

0 commit comments

Comments
 (0)