|
1 | | -// RUN: triton-opt %s -split-input-file --intel-allocate-shared-memory --convert-triton-intel-gpu-to-llvm --convert-tritongen-to-llvm | FileCheck %s --implicit-check-not=llvm.inline_asm --dump-input-context=20 --check-prefixes=CHECK,NO-PREDICATED |
2 | | -// RUN: env TRITON_INTEL_PREDICATED_LOAD=1 triton-opt %s -split-input-file --intel-allocate-shared-memory --convert-triton-intel-gpu-to-llvm --convert-tritongen-to-llvm | FileCheck %s --implicit-check-not=llvm.inline_asm --dump-input-context=20 --check-prefixes=CHECK,PREDICATED |
| 1 | +// RUN: env TRITON_INTEL_PREDICATED=0 triton-opt %s -split-input-file --intel-allocate-shared-memory --convert-triton-intel-gpu-to-llvm --convert-tritongen-to-llvm | FileCheck %s --implicit-check-not=llvm.inline_asm --dump-input-context=20 --check-prefixes=CHECK,NO-PREDICATED |
| 2 | +// RUN: env TRITON_INTEL_PREDICATED=1 triton-opt %s -split-input-file --intel-allocate-shared-memory --convert-triton-intel-gpu-to-llvm --convert-tritongen-to-llvm | FileCheck %s --implicit-check-not=llvm.inline_asm --dump-input-context=20 --check-prefixes=CHECK,PREDICATED |
3 | 3 |
|
4 | 4 | module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32} { |
5 | 5 | // CHECK: llvm.func spir_kernelcc @test_empty_kernel(%arg0: i64, %arg1: !llvm.ptr<1>, %arg2: !llvm.ptr<1>, %arg3: !llvm.ptr<1>) |
@@ -694,21 +694,27 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32} { |
694 | 694 | // CHECK-NEXT: [[VEC2:%.*]] = llvm.mlir.undef : vector<1xi32> |
695 | 695 | // CHECK-NEXT: [[ZERO:%.*]] = llvm.mlir.constant(0 : i32) : i32 |
696 | 696 | // CHECK-NEXT: [[IE2:%.*]] = llvm.insertelement [[BCAST1]], [[VEC2]][[[ZERO]] : i32] : vector<1xi32> |
697 | | - // CHECK-NEXT: llvm.cond_br [[ARG2_0]], ^bb1, ^bb2 |
698 | | - // CHECK-NEXT: ^bb1: |
699 | | - // CHECK-NEXT: [[BCAST2:%.*]] = llvm.bitcast [[ARG0_0]] : !llvm.ptr<1> to !llvm.ptr<1> |
700 | | - // CHECK-NEXT: llvm.store [[IE2]], [[BCAST2]] {alignment = 4 : i64} : vector<1xi32>, !llvm.ptr<1> |
701 | | - // CHECK-NEXT: llvm.br ^bb2 |
702 | | - // CHECK-NEXT: ^bb2: |
| 697 | + // CHECK-NEXT: [[BCAST2:%.*]] = llvm.bitcast [[ARG0_0]] : !llvm.ptr<1> to !llvm.ptr<1> |
| 698 | + // PREDICATED-NEXT: [[BCAST3:%.*]] = llvm.bitcast [[IE2]] : vector<1xi32> to vector<1xf32> |
| 699 | + // PREDICATED: [[ALIGNMENT:%.*]] = llvm.mlir.constant(4 : i64) : i64 |
| 700 | + // PREDICATED: llvm.call spir_funccc @llvm.genx.GenISA.PredicatedStore.p1f32.v1f32([[BCAST2]], [[BCAST3]], [[ALIGNMENT]], [[ARG2_0]]) {{.*}} : (!llvm.ptr<1>, vector<1xf32>, i64, i1) -> () |
| 701 | + // NO-PREDICATED: llvm.cond_br [[ARG2_0]], ^bb1, ^bb2 |
| 702 | + // NO-PREDICATED-NEXT: ^bb1: |
| 703 | + // NO-PREDICATED-NEXT: llvm.store [[IE2]], [[BCAST2]] {alignment = 4 : i64} : vector<1xi32>, !llvm.ptr<1> |
| 704 | + // NO-PREDICATED-NEXT: llvm.br ^bb2 |
| 705 | + // NO-PREDICATED-NEXT: ^bb2: |
703 | 706 | // CHECK: [[VEC3:%.*]] = llvm.mlir.undef : vector<1xi32> |
704 | 707 | // CHECK-NEXT: [[ZERO:%.*]] = llvm.mlir.constant(0 : i32) : i32 |
705 | 708 | // CHECK-NEXT: [[IE3:%.*]] = llvm.insertelement {{.*}}, [[VEC3]][[[ZERO]] : i32] : vector<1xi32> |
706 | | - // CHECK: llvm.cond_br [[ARG2_1]], ^bb3, ^bb4 |
707 | | - // CHECK-NEXT: ^bb3: |
708 | 709 | // CHECK-NEXT: [[BCAST2:%.*]] = llvm.bitcast [[ARG0_1]] : !llvm.ptr<1> to !llvm.ptr<1> |
709 | | - // CHECK-NEXT: llvm.store [[IE3]], [[BCAST2]] {alignment = 4 : i64} : vector<1xi32>, !llvm.ptr<1> |
710 | | - // CHECK-NEXT: llvm.br ^bb4 |
711 | | - // CHECK-NEXT: ^bb4: |
| 710 | + // PREDICATED-NEXT: [[BCAST3:%.*]] = llvm.bitcast [[IE3]] : vector<1xi32> to vector<1xf32> |
| 711 | + // PREDICATED: [[ALIGNMENT:%.*]] = llvm.mlir.constant(4 : i64) : i64 |
| 712 | + // PREDICATED: llvm.call spir_funccc @llvm.genx.GenISA.PredicatedStore.p1f32.v1f32([[BCAST2]], [[BCAST3]], [[ALIGNMENT]], [[ARG2_1]]) {{.*}} : (!llvm.ptr<1>, vector<1xf32>, i64, i1) -> () |
| 713 | + // NO-PREDICATED: llvm.cond_br [[ARG2_1]], ^bb3, ^bb4 |
| 714 | + // NO-PREDICATED-NEXT: ^bb3: |
| 715 | + // NO-PREDICATED-NEXT: llvm.store [[IE3]], [[BCAST2]] {alignment = 4 : i64} : vector<1xi32>, !llvm.ptr<1> |
| 716 | + // NO-PREDICATED-NEXT: llvm.br ^bb4 |
| 717 | + // NO-PREDICATED-NEXT: ^bb4: |
712 | 718 | tt.store %ptrs, %vals, %mask : tensor<256x!tt.ptr<f32>, #blocked0> |
713 | 719 | tt.return |
714 | 720 | } |
@@ -1345,10 +1351,11 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32} { |
1345 | 1351 | // CHECK-LABEL: store_f32_scalar |
1346 | 1352 | tt.func @store_f32_scalar(%arg0 : !tt.ptr<f32>, %arg1 : f32) { |
1347 | 1353 | // CHECK: llvm.icmp "eq" |
1348 | | - // CHECK: llvm.cond_br {{.*}}, ^bb1, ^bb2 |
1349 | | - // CHECK-NEXT: ^bb1: |
1350 | | - // CHECK-NEXT: [[BCAST:%.*]] = llvm.bitcast %arg0 : !llvm.ptr<1> to !llvm.ptr<1> |
1351 | | - // CHECK-NEXT: llvm.store {{.*}}, [[BCAST]] {alignment = 4 : i64} : vector<1xi32>, !llvm.ptr<1> |
| 1354 | + // CHECK: [[BCAST:%.*]] = llvm.bitcast %arg0 : !llvm.ptr<1> to !llvm.ptr<1> |
| 1355 | + // PREDICATED: llvm.call spir_funccc @llvm.genx.GenISA.PredicatedStore.p1f32.v1f32([[BCAST]], {{.*}}) {{.*}} : (!llvm.ptr<1>, vector<1xf32>, i64, i1) -> () |
| 1356 | + // NO-PREDICATED: llvm.cond_br {{.*}}, ^bb1, ^bb2 |
| 1357 | + // NO-PREDICATED-NEXT: ^bb1: |
| 1358 | + // NO-PREDICATED-NEXT: llvm.store {{.*}}, [[BCAST]] {alignment = 4 : i64} : vector<1xi32>, !llvm.ptr<1> |
1352 | 1359 | tt.store %arg0, %arg1 : !tt.ptr<f32> |
1353 | 1360 | tt.return |
1354 | 1361 | } |
|
0 commit comments