@@ -338,6 +338,64 @@ gpu.module @test {
338338 }
339339}
340340
341+ // -----
342+ // CHECK-LABEL: gpu.func @scatter_ops_scf_yield({{.*}}) {
343+ // CHECK: %[[DEFAULT:.*]] = arith.constant dense<1.200000e+01> : vector<8xf16>
344+ // CHECK: %[[OFFSET:.*]] = arith.constant dense<12> : vector<1xindex>
345+ // CHECK: %[[MASK:.*]] = arith.constant dense<true> : vector<1xi1>
346+ // CHECK: %[[PREDICATE:.*]] = llvm.mlir.poison : i1
347+ // CHECK: %[[PREDICATED_LOAD:.*]] = scf.if %[[PREDICATE]] -> (vector<8xf16>) {
348+ // CHECK-NEXT: %[[LOADED:.*]] = xegpu.load %arg0[%[[OFFSET]]], %[[MASK]] <{chunk_size = 8 : i64}> : memref<256xf16>, vector<1xindex>, vector<1xi1> -> vector<8xf16>
349+ // CHECK-NEXT: scf.yield %[[LOADED]] : vector<8xf16>
350+ // CHECK-NEXT: } else {
351+ // CHECK-NEXT: scf.yield %[[DEFAULT]] : vector<8xf16>
352+ // CHECK-NEXT: }
353+ // CHECK-NEXT: xegpu.store %[[PREDICATED_LOAD]], %arg0[%[[OFFSET]]], %[[MASK]] <{chunk_size = 8 : i64}> : vector<8xf16>, memref<256xf16>, vector<1xindex>, vector<1xi1>
354+ gpu.module @test {
355+ gpu.func @scatter_ops_scf_yield (%src: memref <256 xf16 >) {
356+ %pred = llvm.mlir.poison : i1
357+ %1 = arith.constant {layout_result_0 = #xegpu.layout <lane_layout = [16 ], lane_data = [1 ]>} dense <1 >: vector <16 xi1 >
358+ %offset = arith.constant {layout_result_0 = #xegpu.layout <lane_layout = [16 ], lane_data = [1 ]>} dense <12 > : vector <16 xindex >
359+ %loaded = scf.if %pred -> (vector <16 x8 xf16 >) {
360+ %3 = xegpu.load %src [%offset ], %1 <{chunk_size =8 }> {
361+ layout_result_0 = #xegpu.layout <lane_layout = [16 , 1 ], lane_data = [1 , 2 ]>
362+ } : memref <256 xf16 >, vector <16 xindex >, vector <16 xi1 > -> vector <16 x8 xf16 >
363+ scf.yield %3 : vector <16 x8 xf16 >
364+ } else {
365+ %3 = arith.constant {
366+ layout_result_0 = #xegpu.layout <lane_layout = [16 , 1 ], lane_data = [1 , 2 ]>
367+ } dense <12. > : vector <16 x8 xf16 >
368+ scf.yield %3 : vector <16 x8 xf16 >
369+ } { layout_result_0 = #xegpu.layout <lane_layout = [16 , 1 ], lane_data = [1 , 2 ]> }
370+ xegpu.store %loaded , %src [%offset ], %1 <{chunk_size =8 }> : vector <16 x8 xf16 >, memref <256 xf16 >, vector <16 xindex >, vector <16 xi1 >
371+ gpu.return
372+ }
373+ }
374+
375+ // -----
376+ // CHECK-LABEL: gpu.func @scatter_ops_scf_non_yield({{.*}}) {
377+ // CHECK: %[[OFFSET:.*]] = arith.constant dense<12> : vector<1xindex>
378+ // CHECK: %[[MASK:.*]] = arith.constant dense<true> : vector<1xi1>
379+ // CHECK: %[[PREDICATE:.*]] = llvm.mlir.poison : i1
380+ // CHECK: scf.if %[[PREDICATE]] {
381+ // CHECK-NEXT: %[[LOADED:.*]] = xegpu.load %arg0[%[[OFFSET]]], %[[MASK]] <{chunk_size = 8 : i64}> : memref<256xf16>, vector<1xindex>, vector<1xi1> -> vector<8xf16>
382+ // CHECK-NEXT: xegpu.store %[[LOADED]], %arg0[%[[OFFSET]]], %[[MASK]] <{chunk_size = 8 : i64}> : vector<8xf16>, memref<256xf16>, vector<1xindex>, vector<1xi1>
383+ // CHECK-NEXT: }
384+ gpu.module @test {
385+ gpu.func @scatter_ops_scf_non_yield (%src: memref <256 xf16 >) {
386+ %pred = llvm.mlir.poison : i1
387+ %1 = arith.constant {layout_result_0 = #xegpu.layout <lane_layout = [16 ], lane_data = [1 ]>} dense <1 >: vector <16 xi1 >
388+ %offset = arith.constant {layout_result_0 = #xegpu.layout <lane_layout = [16 ], lane_data = [1 ]>} dense <12 > : vector <16 xindex >
389+ scf.if %pred {
390+ %3 = xegpu.load %src [%offset ], %1 <{chunk_size =8 }> {
391+ layout_result_0 = #xegpu.layout <lane_layout = [16 , 1 ], lane_data = [1 , 2 ]>
392+ } : memref <256 xf16 >, vector <16 xindex >, vector <16 xi1 > -> vector <16 x8 xf16 >
393+ xegpu.store %3 , %src [%offset ], %1 <{chunk_size =8 }> : vector <16 x8 xf16 >, memref <256 xf16 >, vector <16 xindex >, vector <16 xi1 >
394+ }
395+ gpu.return
396+ }
397+ }
398+
341399// -----
342400// CHECK-LABEL: gpu.func @scatter_ops({{.*}}) {
343401// CHECK: %[[MASK:.*]] = arith.constant dense<true> : vector<1xi1>
0 commit comments