21
21
22
22
#include <string.h>
23
23
#include <stdio.h>
24
+ #include <stdlib.h>
24
25
#include <assert.h>
25
26
26
27
#include <clBLAS.h>
@@ -1219,10 +1220,11 @@ genUpdateGenericDiagTile(
1219
1220
// type of the vectorized coordinates
1220
1221
Kstring vctype ;
1221
1222
Kstring constOffs , constShifts , constMasks ;
1222
- unsigned int i , j , nops ;
1223
+ unsigned int i , j , nops , size ;
1223
1224
unsigned int maxFetches = 0 ;
1224
1225
const char * yname , * xname ;
1225
1226
const char * ldcName ;
1227
+ char hexadec [1 ];
1226
1228
1227
1229
batch = createStmtBatch ();
1228
1230
if (batch == NULL ) {
@@ -1253,6 +1255,14 @@ genUpdateGenericDiagTile(
1253
1255
tifl = (isUpper ) ? TILE_ITER_BACKWARD_ROWS :
1254
1256
TILE_ITER_BACKWARD_COLS ;
1255
1257
iterInit (& iter , & tileTempC , 1 , tifl );
1258
+ nops = 0 ;
1259
+ while (!iterIsEnd (& iter )) {
1260
+ nops ++ ;
1261
+ size = nops / nrCols ;
1262
+ iterIterate (& iter );
1263
+ }
1264
+
1265
+ iterInit (& iter , & tileTempC , 1 , tifl );
1256
1266
1257
1267
initTmpResTile (& tileTempC , gset , true);
1258
1268
@@ -1316,7 +1326,7 @@ genUpdateGenericDiagTile(
1316
1326
maxFetches = umin (maxFetches , i );
1317
1327
1318
1328
// declare vectorized coordinates
1319
- declareDiagUpresIndexedVars (ctx , vctype .buf , "cc" , tempRows );
1329
+ declareDiagUpresIndexedVars (ctx , vctype .buf , "cc" , size );
1320
1330
1321
1331
/*
1322
1332
* real y coordinate, offset mask and
@@ -1326,8 +1336,8 @@ genUpdateGenericDiagTile(
1326
1336
"unsigned int mask;\n"
1327
1337
"int hit;\n" );
1328
1338
if (withBeta ) {
1329
- declareDiagUpresIndexedVars (ctx , typeName , "alphaNew" , tempRows );
1330
- declareDiagUpresIndexedVars (ctx , typeName , "betaNew" , tempRows );
1339
+ declareDiagUpresIndexedVars (ctx , typeName , "alphaNew" , size );
1340
+ declareDiagUpresIndexedVars (ctx , typeName , "betaNew" , size );
1331
1341
}
1332
1342
1333
1343
// declare tile
@@ -1443,7 +1453,8 @@ genUpdateGenericDiagTile(
1443
1453
ksprintf (& kstr , "cc%u" , i );
1444
1454
}
1445
1455
else {
1446
- ksprintf (& kstr , "cc%u.s%u" , i , iter .col );
1456
+ itoa (iter .col , hexadec , 16 );
1457
+ ksprintf (& kstr , "cc%u.s%s" , i , hexadec );
1447
1458
}
1448
1459
1449
1460
// prepare multipliers and fetch
0 commit comments