|
1 |
| -// RUN: mlir-opt %s -convert-gpu-to-rocdl -split-input-file | FileCheck %s |
2 |
| -// RUN: mlir-opt %s -convert-gpu-to-rocdl='allowed-dialects=func,arith,math' -split-input-file | FileCheck %s |
3 |
| -// RUN: mlir-opt %s -convert-gpu-to-rocdl='index-bitwidth=32' -split-input-file | FileCheck --check-prefix=CHECK32 %s |
| 1 | +// RUN: mlir-opt %s -convert-gpu-to-rocdl='chipset=gfx950' -split-input-file | FileCheck %s |
| 2 | +// RUN: mlir-opt %s -convert-gpu-to-rocdl='chipset=gfx950 allowed-dialects=func,arith,math' -split-input-file | FileCheck %s |
| 3 | +// RUN: mlir-opt %s -convert-gpu-to-rocdl='chipset=gfx950 index-bitwidth=32' -split-input-file | FileCheck --check-prefix=CHECK32 %s |
4 | 4 |
|
5 | 5 | // CHECK-LABEL: @test_module
|
6 | 6 | // CHECK-SAME: llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
|
@@ -734,14 +734,40 @@ gpu.module @test_module {
|
734 | 734 | func.return %shfl, %shfli, %shflu, %shfld : f32, f32, f32, f32
|
735 | 735 | }
|
736 | 736 |
|
| 737 | + // CHECK-LABEL: func @gpu_shuffle_promote() |
| 738 | + func.func @gpu_shuffle_promote() -> (f32, f32, f32) { |
| 739 | + // CHECK: %[[#VALUE:]] = llvm.mlir.constant(1.000000e+00 : f32) : f32 |
| 740 | + %arg0 = arith.constant 1.0 : f32 |
| 741 | + %arg1 = arith.constant 4 : i32 |
| 742 | + %arg2 = arith.constant 16 : i32 |
| 743 | + %arg3 = arith.constant 32 : i32 |
| 744 | + %arg4 = arith.constant 64 : i32 |
| 745 | + // CHECK: %[[#CAST_VALUE:]] = llvm.bitcast %[[#VALUE]] : f32 to i32 |
| 746 | + // CHECK: %[[#MASK:]] = llvm.mlir.constant(4127 : i32) : i32 |
| 747 | + // CHECK: %[[#PERMUTE:]] = rocdl.ds_swizzle %[[#CAST_VALUE]], %[[#MASK]] : (i32, i32) -> i32 |
| 748 | + // CHECK: %[[#CAST_SHFL_VALUE:]] = llvm.bitcast %[[#PERMUTE]] : i32 to f32 |
| 749 | + %shfl1, %pred1 = gpu.shuffle xor %arg0, %arg1, %arg4 : f32 |
| 750 | + // CHECK: %[[#CAST_VALUE:]] = llvm.bitcast %[[#VALUE]] : f32 to i32 |
| 751 | + // CHECK: %[[#PERMUTE:]] = rocdl.permlane16.swap %[[#CAST_VALUE]], %[[#CAST_VALUE]], false, false : (i32, i32) -> <(i32, i32)> |
| 752 | + // CHECK: %[[#EXTRACT:]] = llvm.extractvalue %[[#PERMUTE:]][0] : !llvm.struct<(i32, i32)> |
| 753 | + // CHECK: %[[#CAST_SHFL_VALUE:]] = llvm.bitcast %[[#EXTRACT]] : i32 to f32 |
| 754 | + %shfl2, %pred2 = gpu.shuffle xor %arg0, %arg2, %arg4 : f32 |
| 755 | + // CHECK: %[[#CAST_VALUE:]] = llvm.bitcast %[[#VALUE]] : f32 to i32 |
| 756 | + // CHECK: %[[#PERMUTE:]] = rocdl.permlane32.swap %[[#CAST_VALUE]], %[[#CAST_VALUE]], false, false : (i32, i32) -> <(i32, i32)> |
| 757 | + // CHECK: %[[#EXTRACT:]] = llvm.extractvalue %[[#PERMUTE:]][0] : !llvm.struct<(i32, i32)> |
| 758 | + // CHECK: %[[#CAST_SHFL_VALUE:]] = llvm.bitcast %[[#EXTRACT]] : i32 to f32 |
| 759 | + %shfl3, %pred3 = gpu.shuffle xor %arg0, %arg3, %arg4 : f32 |
| 760 | + func.return %shfl1, %shfl2, %shfl3 : f32, f32, f32 |
| 761 | + } |
| 762 | + |
737 | 763 | // CHECK-LABEL: func @gpu_shuffle_vec
|
738 | 764 | // CHECK-SAME: (%[[ARG:.*]]: vector<4xf16>, %{{.*}}: i32, %{{.*}}: i32)
|
739 | 765 | func.func @gpu_shuffle_vec(%arg0: vector<4xf16>, %arg1: i32, %arg2: i32) -> vector<4xf16> {
|
740 | 766 | // CHECK: %[[CAST1:.*]] = llvm.bitcast %[[ARG]] : vector<4xf16> to vector<2xi32>
|
741 | 767 | // CHECK: %[[IDX0:.*]] = llvm.mlir.constant(0 : i32) : i32
|
742 |
| - // CHECK: %[[ELEM0:.*]] = llvm.extractelement %13[%[[IDX0]] : i32] : vector<2xi32> |
| 768 | + // CHECK: %[[ELEM0:.*]] = llvm.extractelement %[[CAST1]][%[[IDX0]] : i32] : vector<2xi32> |
743 | 769 | // CHECK: %[[IDX1:.*]] = llvm.mlir.constant(1 : i32) : i32
|
744 |
| - // CHECK: %[[ELEM1:.*]] = llvm.extractelement %13[%[[IDX1]] : i32] : vector<2xi32> |
| 770 | + // CHECK: %[[ELEM1:.*]] = llvm.extractelement %[[CAST1]][%[[IDX1]] : i32] : vector<2xi32> |
745 | 771 | // CHECK: %[[PERM0:.*]] = rocdl.ds_bpermute %{{.*}}, %[[ELEM0]] : (i32, i32) -> i32
|
746 | 772 | // CHECK: %[[PERM1:.*]] = rocdl.ds_bpermute %{{.*}}, %[[ELEM1]] : (i32, i32) -> i32
|
747 | 773 | // CHECK: %[[V0:.*]] = llvm.mlir.poison : vector<2xi32>
|
|
0 commit comments