@@ -531,6 +531,35 @@ func.func @warp_scf_for_swap_no_yield(%arg0: index) {
531531 return
532532}
533533
534+ // -----
535+ // scf.for result is not distributed in this case.
536+ // CHECK-PROP-LABEL: func @warp_scf_for_broadcasted_result(
537+ // CHECK-PROP: %[[W0:.*]] = gpu.warp_execute_on_lane_0(%{{.*}})[32] -> (vector<1xf32>) {
538+ // CHECK-PROP: %[[INI:.*]] = "some_def"() : () -> vector<1xf32>
539+ // CHECK-PROP: gpu.yield %[[INI]] : vector<1xf32>
540+ // CHECK-PROP: }
541+ // CHECK-PROP: %[[F:.*]] = scf.for {{.*}} iter_args(%[[ARG2:.*]] = %[[W0]]) -> (vector<1xf32>) {
542+ // CHECK-PROP: %[[W1:.*]] = gpu.warp_execute_on_lane_0(%{{.*}})[32] args(%[[ARG2]] : vector<1xf32>) -> (vector<1xf32>) {
543+ // CHECK-PROP: ^bb0(%{{.*}}: vector<1xf32>):
544+ // CHECK-PROP: %[[T0:.*]] = "some_op"(%{{.*}}) : (vector<1xf32>) -> vector<1xf32>
545+ // CHECK-PROP: gpu.yield %[[T0]] : vector<1xf32>
546+ // CHECK-PROP: }
547+ // CHECK-PROP: scf.yield %[[W1]] : vector<1xf32>
548+ func.func @warp_scf_for_broadcasted_result (%arg0: index ) -> vector <1 xf32 > {
549+ %c128 = arith.constant 128 : index
550+ %c1 = arith.constant 1 : index
551+ %c0 = arith.constant 0 : index
552+ %2 = gpu.warp_execute_on_lane_0 (%arg0 )[32 ] -> (vector <1 xf32 >) {
553+ %ini = " some_def" () : () -> (vector <1 xf32 >)
554+ %0 = scf.for %arg3 = %c0 to %c128 step %c1 iter_args (%arg4 = %ini ) -> (vector <1 xf32 >) {
555+ %1 = " some_op" (%arg4 ) : (vector <1 xf32 >) -> (vector <1 xf32 >)
556+ scf.yield %1 : vector <1 xf32 >
557+ }
558+ gpu.yield %0 : vector <1 xf32 >
559+ }
560+ return %2 : vector <1 xf32 >
561+ }
562+
534563// -----
535564
536565#map = affine_map <()[s0 ] -> (s0 * 4 )>
0 commit comments