@@ -416,7 +416,6 @@ static int reduxInvCleanupMsg (redux_ctx* ctx, int r
416416static size_t reduxInvEstimateParallelism (const redux_ctx * ctx );
417417static int reduxInvRequiresDst (const redux_ctx * ctx );
418418static int reduxInvRequiresDstArg (const redux_ctx * ctx );
419- static int reduxInvKernelRequiresDst (const redux_ctx * ctx );
420419static unsigned reduxInvGetSplitFree (const redux_ctx * ctx );
421420static unsigned reduxInvGetSplitReduce (const redux_ctx * ctx );
422421static axis_desc * reduxInvGetSrcAxis (const redux_ctx * ctx , int i );
@@ -1144,12 +1143,6 @@ static int reduxInvRequiresDst (const redux_ctx* ctx){
11441143static int reduxInvRequiresDstArg (const redux_ctx * ctx ){
11451144 return reduxGenRequiresDstArg (ctx -> gr );
11461145}
1147- static int reduxInvKernelRequiresDst (const redux_ctx * ctx ){
1148- return reduxGenKernelRequiresDst (ctx -> gr );
1149- }
1150- static int reduxInvKernelRequiresDstArg (const redux_ctx * ctx ){
1151- return reduxGenKernelRequiresDstArg (ctx -> gr );
1152- }
11531146static unsigned reduxInvGetSplitFree (const redux_ctx * ctx ){
11541147 if (ctx -> xdSplit && !axisIsReduced (ctx -> xdSplit )){
11551148 return axisGetIntraLen (ctx -> xdSplit );
@@ -1512,26 +1505,26 @@ static void reduxGenIterArgs (GpuReduction* gr,
15121505 for (k = gr -> ndd ;k < gr -> nds && reduxGenRequiresDstArg (gr );k ++ ){
15131506 fn (gr , GA_SIZE , "TX" , "l%dPDim" , k , user );
15141507 }
1515- fn (gr , GA_BUFFER , "const GLOBAL_MEM char*" , "s" , 0 , user );
1508+ fn (gr , GA_BUFFER , "const GLOBAL_MEM char* restrict " , "s" , 0 , user );
15161509 fn (gr , GA_SSIZE , "TX" , "sOff" , 0 , user );
15171510 for (k = 0 ;k < gr -> nds ;k ++ ){
15181511 fn (gr , GA_SIZE , "TX" , "sJ%d" , k , user );
15191512 }
15201513 if (reduxGenRequiresDst (gr )){
1521- fn (gr , GA_BUFFER , "GLOBAL_MEM char*" , "d" , 0 , user );
1514+ fn (gr , GA_BUFFER , "GLOBAL_MEM char* restrict " , "d" , 0 , user );
15221515 fn (gr , GA_SSIZE , "TX" , "dOff" , 0 , user );
15231516 for (k = 0 ;k < gr -> ndd ;k ++ ){
15241517 fn (gr , GA_SIZE , "TX" , "dJ%d" , k , user );
15251518 }
15261519 }
15271520 if (reduxGenRequiresDstArg (gr )){
1528- fn (gr , GA_BUFFER , "GLOBAL_MEM char*" , "a" , 0 , user );
1521+ fn (gr , GA_BUFFER , "GLOBAL_MEM char* restrict " , "a" , 0 , user );
15291522 fn (gr , GA_SSIZE , "TX" , "aOff" , 0 , user );
15301523 for (k = 0 ;k < gr -> ndd ;k ++ ){
15311524 fn (gr , GA_SIZE , "TX" , "aJ%d" , k , user );
15321525 }
15331526 }
1534- fn (gr , GA_BUFFER , "GLOBAL_MEM char*" , "w" , 0 , user );
1527+ fn (gr , GA_BUFFER , "GLOBAL_MEM char* restrict " , "w" , 0 , user );
15351528 if (reduxGenKernelRequiresDst (gr )){
15361529 fn (gr , GA_SSIZE , "TX" , "wdOff" , 0 , user );
15371530 fn (gr , GA_SSIZE , "TX" , "pdOff" , 0 , user );
@@ -1624,9 +1617,9 @@ static void reduxGenSrcAppendMacroDefs (GpuReduction* gr){
16241617 */
16251618
16261619 if (gr -> srcTypeCode == GA_HALF && gr -> accTypeCode == GA_FLOAT ){
1627- srcbAppends (& gr -> srcGen , "#define LOADS(v, p) do{(v) = (TK)load_half((TS*)(p));}while(0)\n" );
1620+ srcbAppends (& gr -> srcGen , "#define LOADS(v, p) do{(v) = (TK)load_half((const TS* restrict )(p));}while(0)\n" );
16281621 }else {
1629- srcbAppends (& gr -> srcGen , "#define LOADS(v, p) do{(v) = (TK)*(TS*)(p);}while(0)\n" );
1622+ srcbAppends (& gr -> srcGen , "#define LOADS(v, p) do{(v) = (TK)*(const TS* restrict )(p);}while(0)\n" );
16301623 }
16311624
16321625
@@ -1737,9 +1730,9 @@ static void reduxGenSrcAppendMacroDefs (GpuReduction* gr){
17371730
17381731 if (reduxGenRequiresDst (gr )){
17391732 if (gr -> dstTypeCode == GA_HALF && gr -> accTypeCode == GA_FLOAT ){
1740- srcbAppends (& gr -> srcGen , "#define STORED(p, v) do{store_half((TD*)(p), (v));}while(0)\n" );
1733+ srcbAppends (& gr -> srcGen , "#define STORED(p, v) do{store_half((TD* restrict )(p), (v));}while(0)\n" );
17411734 }else {
1742- srcbAppends (& gr -> srcGen , "#define STORED(p, v) do{*(TD*)(p) = (v);}while(0)\n" );
1735+ srcbAppends (& gr -> srcGen , "#define STORED(p, v) do{*(TD* restrict )(p) = (v);}while(0)\n" );
17431736 }
17441737 }else {
17451738 srcbAppends (& gr -> srcGen , "#define STORED(p, v) do{}while(0)\n" );
@@ -1753,7 +1746,7 @@ static void reduxGenSrcAppendMacroDefs (GpuReduction* gr){
17531746 */
17541747
17551748 if (reduxGenRequiresDstArg (gr )){
1756- srcbAppends (& gr -> srcGen , "#define STOREA(p, v) do{*(TA*)(p) = (v);}while(0)\n" );
1749+ srcbAppends (& gr -> srcGen , "#define STOREA(p, v) do{*(TA* restrict )(p) = (v);}while(0)\n" );
17571750 }else {
17581751 srcbAppends (& gr -> srcGen , "#define STOREA(p, v) do{}while(0)\n" );
17591752 }
@@ -2085,17 +2078,17 @@ static void reduxGenSrcAppendBlockDecode (GpuReduction* gr){
20852078 srcbAppends (& gr -> srcGen , " \n" );
20862079 if (reduxGenKernelRequiresDst (gr )){
20872080 srcbAppends (& gr -> srcGen ,
2088- " TK* wd = (TK*)(w + wdOff);\n"
2089- " TK* wdL = &wd[0];\n"
2090- " TK* wdR = &wd[GDIM_0*D];\n"
2091- " TK* pd = (TK*)(SHMEM + pdOff);\n" );
2081+ " TK* restrict wd = (TK* restrict )(w + wdOff);\n"
2082+ " TK* restrict wdL = &wd[0];\n"
2083+ " TK* restrict wdR = &wd[GDIM_0*D];\n"
2084+ " TK* restrict pd = (TK* restrict )(SHMEM + pdOff);\n" );
20922085 }
20932086 if (reduxGenKernelRequiresDstArg (gr )){
20942087 srcbAppends (& gr -> srcGen ,
2095- " TA* wa = (TA*)(w + waOff);\n"
2096- " TA* waL = &wa[0];\n"
2097- " TA* waR = &wa[GDIM_0*D];\n"
2098- " TA* pa = (TA*)(SHMEM + paOff);\n" );
2088+ " TA* restrict wa = (TA* restrict )(w + waOff);\n"
2089+ " TA* restrict waL = &wa[0];\n"
2090+ " TA* restrict waR = &wa[GDIM_0*D];\n"
2091+ " TA* restrict pa = (TA* restrict )(SHMEM + paOff);\n" );
20992092 }
21002093 srcbAppends (& gr -> srcGen , " \n" );
21012094}
@@ -2173,12 +2166,12 @@ static void reduxGenSrcAppendThreadDecode (GpuReduction* gr){
21732166 " local_barrier();\n" );
21742167 }
21752168 srcbAppends (& gr -> srcGen , " \n"
2176- " const char* ts = s + sOff;\n" );
2169+ " const char* restrict ts = s + sOff;\n" );
21772170 if (reduxGenRequiresDst (gr )){
2178- srcbAppends (& gr -> srcGen , " char* td = d + dOff;\n" );
2171+ srcbAppends (& gr -> srcGen , " char* restrict td = d + dOff;\n" );
21792172 }
21802173 if (reduxGenRequiresDstArg (gr )){
2181- srcbAppends (& gr -> srcGen , " char* ta = a + aOff;\n" );
2174+ srcbAppends (& gr -> srcGen , " char* restrict ta = a + aOff;\n" );
21822175 }
21832176 srcbAppends (& gr -> srcGen , " \n"
21842177 " \n" );
0 commit comments