@@ -468,3 +468,40 @@ func.func private @no_block_func_declaration() -> ()
468468
469469// CHECK: llvm.func @no_block_external_func()
470470llvm.func @no_block_external_func () attributes {sym_visibility = " private" }
471+
472+ // -----
473+
474+ // Check that yielded values aren't incorrectly removed in gpu regions
475+ gpu.module @test_module_3 {
476+ gpu.func @gpu_all_reduce_region () {
477+ %arg0 = arith.constant 1 : i32
478+ %result = gpu.all_reduce %arg0 uniform {
479+ ^bb (%lhs : i32 , %rhs : i32 ):
480+ %xor = arith.xori %lhs , %rhs : i32
481+ " gpu.yield" (%xor ) : (i32 ) -> ()
482+ } : (i32 ) -> (i32 )
483+ gpu.return
484+ }
485+ }
486+
487+ // CHECK-LABEL: func @gpu_all_reduce_region()
488+ // CHECK: %[[yield:.*]] = arith.xori %{{.*}}, %{{.*}} : i32
489+ // CHECK: "gpu.yield"(%[[yield]]) : (i32) -> ()
490+
491+ // -----
492+
493+ // Check that yielded values aren't incorrectly removed in linalg regions
494+ module {
495+ func.func @linalg_red_add (%arg0: tensor <?xf32 >, %arg1: tensor <1 xf32 >) -> tensor <1 xf32 > {
496+ %0 = linalg.generic {index ing_maps = [#map , #map1 ], iterator_types = [" reduction" ]} ins (%arg0 : tensor <?xf32 >) outs (%arg1 : tensor <1 xf32 >) {
497+ ^bb0 (%in: f32 , %out: f32 ):
498+ %1 = arith.addf %in , %out : f32
499+ linalg.yield %1 : f32
500+ } -> tensor <1 xf32 >
501+ return %0 : tensor <1 xf32 >
502+ }
503+ }
504+
505+ // CHECK-LABEL: func @linalg_red_add
506+ // CHECK: %[[yield:.*]] = arith.addf %{{.*}}, %{{.*}} : f32
507+ // CHECK: linalg.yield %[[yield]] : f32
0 commit comments