|
1 | 1 | // RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx950 | FileCheck %s |
2 | | - |
3 | | -#gpu_lds_addrspace = 3 |
4 | | -#amdgpu_fat_buffer_addrspace = 7 |
| 2 | +// RUN: not mlir-opt %s --split-input-file -convert-amdgpu-to-rocdl=chipset=gfx945 2>&1 | FileCheck %s --check-prefix=CHECK-OLD |
5 | 3 |
|
6 | 4 | // CHECK-LABEL: func @transpose_load_to_rocdl_4xf16 |
7 | | -func.func @transpose_load_to_rocdl_4xf16(%idx1 : index, %idx2 : index, %wgmem : memref<128x72xf16, #gpu_lds_addrspace>) -> vector<4xf16> { |
| 5 | +func.func @transpose_load_to_rocdl_4xf16(%idx1 : index, %idx2 : index, %wgmem : memref<128x72xf16, 3>) -> vector<4xf16> { |
8 | 6 | // CHECK: rocdl.ds.read.tr16.b64 |
9 | | - %0 = amdgpu.transpose_load %wgmem[%idx1, %idx2] : memref<128x72xf16, #gpu_lds_addrspace> -> vector<4xf16> |
| 7 | + // CHECK-OLD: error: 'amdgpu.transpose_load' op Non-gfx950 chipset not supported |
| 8 | + %0 = amdgpu.transpose_load %wgmem[%idx1, %idx2] : memref<128x72xf16, 3> -> vector<4xf16> |
10 | 9 | return %0 : vector<4xf16> |
11 | 10 | } |
12 | 11 |
|
| 12 | +// ----- |
| 13 | + |
13 | 14 | // CHECK-LABEL: func @transpose_load_to_rocdl_8xi8 |
14 | | -func.func @transpose_load_to_rocdl_8xi8(%idx1 : index, %idx2 : index, %wgmem : memref<128x128xi8, #gpu_lds_addrspace>) -> vector<8xi8> { |
| 15 | +func.func @transpose_load_to_rocdl_8xi8(%idx1 : index, %idx2 : index, %wgmem : memref<128x128xi8, 3>) -> vector<8xi8> { |
15 | 16 | // CHECK: rocdl.ds.read.tr8.b64 |
16 | | - %0 = amdgpu.transpose_load %wgmem[%idx1, %idx2] : memref<128x128xi8, #gpu_lds_addrspace> -> vector<8xi8> |
| 17 | + // CHECK-OLD: error: 'amdgpu.transpose_load' op Non-gfx950 chipset not supported |
| 18 | + %0 = amdgpu.transpose_load %wgmem[%idx1, %idx2] : memref<128x128xi8, 3> -> vector<8xi8> |
17 | 19 | return %0 : vector<8xi8> |
18 | 20 | } |
19 | 21 |
|
| 22 | +// ----- |
| 23 | + |
20 | 24 | // CHECK-LABEL: func @transpose_load_to_rocdl_16xi4 |
21 | | -func.func @transpose_load_to_rocdl_16xi4(%idx1 : index, %idx2 : index, %wgmem : memref<128x16xi4, #gpu_lds_addrspace>) -> vector<16xi4> { |
| 25 | +func.func @transpose_load_to_rocdl_16xi4(%idx1 : index, %idx2 : index, %wgmem : memref<128x16xi4, 3>) -> vector<16xi4> { |
22 | 26 | // CHECK: rocdl.ds.read.tr4.b64 |
23 | | - %0 = amdgpu.transpose_load %wgmem[%idx1, %idx2] : memref<128x16xi4, #gpu_lds_addrspace> -> vector<16xi4> |
| 27 | + // CHECK-OLD: error: 'amdgpu.transpose_load' op Non-gfx950 chipset not supported |
| 28 | + %0 = amdgpu.transpose_load %wgmem[%idx1, %idx2] : memref<128x16xi4, 3> -> vector<16xi4> |
24 | 29 | return %0 : vector<16xi4> |
25 | 30 | } |
26 | 31 |
|
| 32 | +// ----- |
| 33 | + |
27 | 34 | // CHECK-LABEL: func @transpose_load_to_rocdl_3xi32 |
28 | | -func.func @transpose_load_to_rocdl_3xi32(%idx1 : index, %idx2 : index, %wgmem : memref<128x32xi32, #gpu_lds_addrspace>) -> vector<3xi32> { |
| 35 | +func.func @transpose_load_to_rocdl_3xi32(%idx1 : index, %idx2 : index, %wgmem : memref<128x32xi32, 3>) -> vector<3xi32> { |
29 | 36 | // CHECK: rocdl.ds.read.tr6.b96 |
30 | | - %0 = amdgpu.transpose_load %wgmem[%idx1, %idx2] : memref<128x32xi32, #gpu_lds_addrspace> -> vector<3xi32> |
| 37 | + %0 = amdgpu.transpose_load %wgmem[%idx1, %idx2] : memref<128x32xi32, 3> -> vector<3xi32> |
31 | 38 | return %0 : vector<3xi32> |
32 | 39 | } |
0 commit comments