@@ -270,58 +270,40 @@ func.func @thread_partial_execution() {
270270 %3 = arith.addi %arg , %0 : index
271271 affine.yield %3 : index
272272 }
273- // UNROLL-FULL: %{{.*}} = affine.for %{{.*}} = %{{.*}} to 3 step 2 iter_args(%[[ARG:.*]] = %[[C0]]) -> (index) {
274- // UNROLL-FULL: %[[SUM:.*]] = arith.addi %[[ARG]], %[[C0]] : index
275- // UNROLL-FULL: affine.yield %[[SUM]] : index
276- // UNROLL-FULL: }
273+ // UNROLL-FULL: affine.for %{{.*}} = %{{.*}} to 3 step 2 iter_args(%[[ARG:.*]] = %[[C0]]) -> (index) {
274+ // UNROLL-FULL-NEXT : %[[SUM:.*]] = arith.addi %[[ARG]], %[[C0]] : index
275+ // UNROLL-FULL-NEXT : affine.yield %[[SUM]] : index
276+ // UNROLL-FULL-NEXT : }
277277 gpu.terminator
278278 }
279279 return
280280}
281281
282- // UNROLL-FULL-LABEL: func @invalid_loop
283- func.func @invalid_loop () {
284- %0 = arith.constant 0 :index
285- %1 = arith.constant 2 : index
286- gpu.launch blocks (%bx , %by , %bz ) in (%sz_bx = %1 , %sz_by = %1 , %sz_bz = %1 )
287- threads (%tx , %ty , %tz ) in (%sz_tx = %1 , %sz_ty = %1 , %sz_tz = %1 ) {
288- %threadid = gpu.thread_id x
289- affine.for %iv = %tx to 0 step 2 iter_args (%arg = %0 ) -> index {
290- %3 = arith.addi %arg , %0 : index
291- affine.yield %3 : index
292- }
293- gpu.terminator
294- // UNROLL-FULL-CHECK: %{{.*}} = gpu.thread_id x
295- // UNROLL-FULL-CHECK: gpu.terminator
296- }
297- return
298- }
299-
300282// UNROLL-FULL-LABEL: func @unroll_all_thread
301283func.func @unroll_all_thread () {
302284 %0 = arith.constant 0 :index
303285 %1 = arith.constant 2 : index
304- // UNROLL-FULL-CHECK : %[[C0:.*]] = arith.constant 0 : index
286+ // UNROLL-FULL: %[[C0:.*]] = arith.constant 0 : index
305287 gpu.launch blocks (%bx , %by , %bz ) in (%sz_bx = %1 , %sz_by = %1 , %sz_bz = %1 )
306288 threads (%tx , %ty , %tz ) in (%sz_tx = %1 , %sz_ty = %1 , %sz_tz = %1 ) {
307289 %threadid = gpu.thread_id x
308290 %4 = affine.for %iv = %threadid to 6 step 2 iter_args (%arg = %0 ) -> index {
309291 %3 = arith.addi %arg , %0 : index
310292 affine.yield %3 : index
311293 }
312- // UNROLL-FULL-CHECK : %[[SUM_0:.*]] = arith.addi %[[C0]], %[[C0]] : index
313- // UNROLL-FULL-CHECK : %[[SUM_1:.*]] = arith.addi %[[SUM_0]], %[[C0]] : index
314- // UNROLL-FULL-CHECK : %[[SUM_2:.*]] = arith.addi %[[SUM_1]], %[[C0]] : index
294+ // UNROLL-FULL: %[[SUM_0:.*]] = arith.addi %[[C0]], %[[C0]] : index
295+ // UNROLL-FULL-NEXT : %[[SUM_1:.*]] = arith.addi %[[SUM_0]], %[[C0]] : index
296+ // UNROLL-FULL-NEXT : %[[SUM_2:.*]] = arith.addi %[[SUM_1]], %[[C0]] : index
315297 gpu.terminator
316298 }
317299 return
318300}
319301
320- // UNROLL-FULL-LABEL: func.func @partial_unroll_factor_4
302+ // UNROLL-FULL-LABEL: func.func @partial_unroll_factor_4
321303func.func @partial_unroll_factor_4 () {
322304 %0 = arith.constant 0 :index
323305 %1 = arith.constant 2 : index
324- // UNROLL-FULL: %[[C0:.*]] = arith.constant 0 : index
306+ // UNROLL-FULL: %[[C0:.*]] = arith.constant 0 : index
325307 gpu.launch blocks (%bx , %by , %bz ) in (%sz_bx = %1 , %sz_by = %1 , %sz_bz = %1 )
326308 threads (%tx , %ty , %tz ) in (%sz_tx = %1 , %sz_ty = %1 , %sz_tz = %1 ) {
327309 %threadid = gpu.thread_id x
@@ -332,13 +314,13 @@ func.func @partial_unroll_factor_4() {
332314 gpu.terminator
333315 }
334316 // UNROLL-FULL: %[[ID:.*]] = gpu.thread_id x
335- // UNROLL-FULL: affine.for %{{.*}} = %[[ID]] to 9 step 8 iter_args(%[[ARG:.*]] = %[[C0]]) -> (index) {
336- // UNROLL-FULL: %[[SUM_0:.*]] = arith.addi %[[ARG]], %[[C0]] : index
337- // UNROLL-FULL: %[[SUM_1:.*]] = arith.addi %[[SUM_0]], %[[C0]] : index
338- // UNROLL-FULL: %[[SUM_2:.*]] = arith.addi %[[SUM_1]], %[[C0]] : index
339- // UNROLL-FULL: %[[SUM_3:.*]] = arith.addi %[[SUM_2]], %[[C0]] : index
340- // UNROLL-FULL: affine.yield %[[SUM_3]] : index
341- // UNROLL-FULL: }
317+ // UNROLL-FULL-NEXT : affine.for %{{.*}} = %[[ID]] to 9 step 8 iter_args(%[[ARG:.*]] = %[[C0]]) -> (index) {
318+ // UNROLL-FULL-NEXT : %[[SUM_0:.*]] = arith.addi %[[ARG]], %[[C0]] : index
319+ // UNROLL-FULL-NEXT : %[[SUM_1:.*]] = arith.addi %[[SUM_0]], %[[C0]] : index
320+ // UNROLL-FULL-NEXT : %[[SUM_2:.*]] = arith.addi %[[SUM_1]], %[[C0]] : index
321+ // UNROLL-FULL-NEXT : %[[SUM_3:.*]] = arith.addi %[[SUM_2]], %[[C0]] : index
322+ // UNROLL-FULL-NEXT : affine.yield %[[SUM_3]] : index
323+ // UNROLL-FULL-NEXT : }
342324 return
343325}
344326
@@ -800,14 +782,14 @@ func.func @gpu_launch_unroll_by_factor_4() {
800782 gpu.terminator
801783 }
802784 // UNROLL-BY-4: %[[ID:.*]] = gpu.thread_id x
803- // UNROLL-BY-4: %[[SUM_0:.*]] = arith.addi %[[C0]], %[[C0]] : index
804- // UNROLL-BY-4: %[[SUM_1:.*]] = arith.addi %[[SUM_0]], %[[C0]] : index
805- // UNROLL-BY-4: %[[SUM_2:.*]] = arith.addi %[[SUM_1]], %[[C0]] : index
806- // UNROLL-BY-4: %[[SUM_3:.*]] = arith.addi %[[SUM_2]], %[[C0]] : index
807- // UNROLL-BY-4: affine.for %[[VAL_20:.*]] = [[$MAP7]](){{\[}}%[[ID]]] to 11 step 2 iter_args(%[[ARG:.*]] = %[[SUM_3]]) -> (index) {
808- // UNROLL-BY-4: %[[SUM_4:.*]] = arith.addi %[[ARG]], %[[C0]] : index
809- // UNROLL-BY-4: affine.yield %[[SUM_4]] : index
810- // UNROLL-BY-4: }
785+ // UNROLL-BY-4-NEXT : %[[SUM_0:.*]] = arith.addi %[[C0]], %[[C0]] : index
786+ // UNROLL-BY-4-NEXT : %[[SUM_1:.*]] = arith.addi %[[SUM_0]], %[[C0]] : index
787+ // UNROLL-BY-4-NEXT : %[[SUM_2:.*]] = arith.addi %[[SUM_1]], %[[C0]] : index
788+ // UNROLL-BY-4-NEXT : %[[SUM_3:.*]] = arith.addi %[[SUM_2]], %[[C0]] : index
789+ // UNROLL-BY-4-NEXT : affine.for %[[VAL_20:.*]] = [[$MAP7]](){{\[}}%[[ID]]] to 11 step 2 iter_args(%[[ARG:.*]] = %[[SUM_3]]) -> (index) {
790+ // UNROLL-BY-4-NEXT : %[[SUM_4:.*]] = arith.addi %[[ARG]], %[[C0]] : index
791+ // UNROLL-BY-4-NEXT : affine.yield %[[SUM_4]] : index
792+ // UNROLL-BY-4-NEXT : }
811793 return
812794}
813795
0 commit comments