@@ -484,10 +484,10 @@ tt.func @matmul_tma_acc_with_conditional_user(
484484 scf.if %do_epilogue {
485485 // CHECK-NEXT: ttng.wait_barrier [[CUR_ACC_READY_BAR]], [[ACC_PHASE]] {ttg.partition = 0 : i32}
486486 // CHECK-NEXT: [[C:%.*]] = ttng.tmem_load [[ACC_BUF]] {ttg.partition = 0 : i32}
487- // CHECK-NEXT: [[NEXT_ACC_EMPTY_BAR:%.*]] = ttg.memdesc_subview [[ACC_EMPTY_BUFS]][[[NEXT_ACC_INDEX]]]
488- // CHECK-NEXT: ttng.arrive_barrier [[NEXT_ACC_EMPTY_BAR]], 1 {ttg.partition = 0 : i32}
489487 // CHECK-NEXT: "acc_user"([[C]])
490488 " acc_user" (%c ) : (tensor <128 x128 xf32 , #acc_layout >) -> ()
489+ // CHECK-NEXT: [[NEXT_ACC_EMPTY_BAR:%.*]] = ttg.memdesc_subview [[ACC_EMPTY_BUFS]][[[NEXT_ACC_INDEX]]]
490+ // CHECK-NEXT: ttng.arrive_barrier [[NEXT_ACC_EMPTY_BAR]], 1 {ttg.partition = 0 : i32}
491491 // CHECK-NEXT: } {ttg.partition = 0 : i32}
492492 }
493493
@@ -513,7 +513,7 @@ tt.func @matmul_tma_acc_with_conditional_user(
513513
514514// AWS: ttg.warp_specialize
515515// AWS: num_warps(4)
516- // AWS: num_warps(1 )
516+ // AWS: num_warps(2 )
517517// AWS: num_warps(1)
518518
519519// CHECK: @matmul_tma_acc_with_conditional_def
@@ -612,7 +612,7 @@ tt.func @matmul_tma_acc_with_conditional_def(
612612
613613// AWS: ttg.warp_specialize
614614// AWS: num_warps(4)
615- // AWS: num_warps(1 )
615+ // AWS: num_warps(2 )
616616// AWS: num_warps(1)
617617
618618// CHECK: @matmul_tma_acc_with_conditional_def_and_use
@@ -682,10 +682,10 @@ tt.func @matmul_tma_acc_with_conditional_def_and_use(
682682 scf.if %do_epilogue {
683683 // CHECK-NEXT: ttng.wait_barrier [[CUR_ACC_READY_BAR]], [[ACC_PHASE]] {ttg.partition = 0 : i32}
684684 // CHECK-NEXT: [[C:%.*]] = ttng.tmem_load [[ACC_BUF]] {ttg.partition = 0 : i32}
685- // CHECK-NEXT: [[NEXT_ACC_EMPTY_BAR:%.*]] = ttg.memdesc_subview [[ACC_EMPTY_BUFS]][[[NEXT_ACC_INDEX]]]
686- // CHECK-NEXT: ttng.arrive_barrier [[NEXT_ACC_EMPTY_BAR]], 1 {ttg.partition = 0 : i32}
687685 // CHECK-NEXT: "acc_user"([[C]])
688686 " acc_user" (%c ) : (tensor <128 x128 xf32 , #acc_layout >) -> ()
687+ // CHECK-NEXT: [[NEXT_ACC_EMPTY_BAR:%.*]] = ttg.memdesc_subview [[ACC_EMPTY_BUFS]][[[NEXT_ACC_INDEX]]]
688+ // CHECK-NEXT: ttng.arrive_barrier [[NEXT_ACC_EMPTY_BAR]], 1 {ttg.partition = 0 : i32}
689689 // CHECK-NEXT: } {ttg.partition = 0 : i32}
690690 }
691691
@@ -714,7 +714,7 @@ tt.func @matmul_tma_acc_with_conditional_def_and_use(
714714
715715// AWS: ttg.warp_specialize
716716// AWS: num_warps(1)
717- // AWS: num_warps(1 )
717+ // AWS: num_warps(2 )
718718// AWS: num_warps(1)
719719
720720// CHECK: @matmul_tma_acc_with_conditional_def_and_use_no_multibuf
@@ -791,10 +791,12 @@ tt.func @matmul_tma_acc_with_conditional_def_and_use_no_multibuf_flag(
791791 // CHECK-NEXT: scf.if [[DO_EPILOGUE]]
792792 scf.if %do_epilogue {
793793 // CHECK-NEXT: ttng.wait_barrier [[ACC_READY_BUF0]], [[ACC_PHASE]] {ttg.partition = 0 : i32}
794+ // CHECK-NEXT: "some_op"()
795+ " some_op" () : () -> ()
794796 // CHECK-NEXT: [[C:%.*]] = ttng.tmem_load [[ACC_BUF]] {ttg.partition = 0 : i32}
795- // CHECK-NEXT: ttng.arrive_barrier [[ACC_EMPTY_BUF0]], 1 {ttg.partition = 0 : i32}
796797 // CHECK-NEXT: "acc_user"([[C]])
797798 " acc_user" (%c ) : (tensor <128 x128 xf32 , #acc_layout >) -> ()
799+ // CHECK-NEXT: ttng.arrive_barrier [[ACC_EMPTY_BUF0]], 1 {ttg.partition = 0 : i32}
798800 // CHECK-NEXT: } {ttg.partition = 0 : i32}
799801 }
800802
0 commit comments