@@ -34,6 +34,27 @@ func.func @transpose_load_to_rocdl_16xi4(%idx1 : index, %idx2 : index, %wgmem :
3434// CHECK-LABEL: func @transpose_load_to_rocdl_3xi32
3535func.func @transpose_load_to_rocdl_3xi32 (%idx1 : index , %idx2 : index , %wgmem : memref <128 x32 xi32 , 3 >) -> vector <3 xi32 > {
3636 // CHECK: rocdl.ds.read.tr6.b96
37+ // CHECK-OLD: error: 'amdgpu.transpose_load' op Non-gfx950 chipset not supported
3738 %0 = amdgpu.transpose_load %wgmem [%idx1 , %idx2 ] : memref <128 x32 xi32 , 3 > -> vector <3 xi32 >
3839 return %0 : vector <3 xi32 >
3940}
41+
42+ // -----
43+
44+ // CHECK-LABEL: func @transpose_load_to_rocdl_i4_memrefxi1
45+ func.func @transpose_load_to_rocdl_i4_memrefxi1 (%idx1 : index , %idx2 : index , %wgmem : memref <128 x32 xi8 , 3 >) -> vector <16 xi4 > {
46+ // CHECK: rocdl.ds.read.tr4.b64
47+ // CHECK-OLD: error: 'amdgpu.transpose_load' op Non-gfx950 chipset not supported
48+ %0 = amdgpu.transpose_load %wgmem [%idx1 , %idx2 ] : memref <128 x32 xi8 , 3 > -> vector <16 xi4 >
49+ return %0 : vector <16 xi4 >
50+ }
51+
52+ // -----
53+
54+ // CHECK-LABEL: func @transpose_load_to_rocdl_i6_memrefxi1
55+ func.func @transpose_load_to_rocdl_i6_memrefxi1 (%idx1 : index , %idx2 : index , %wgmem : memref <128 x32 xi8 , 3 >) -> vector <3 xi32 > {
56+ // CHECK: rocdl.ds.read.tr6.b96
57+ // CHECK-OLD: error: 'amdgpu.transpose_load' op Non-gfx950 chipset not supported
58+ %0 = amdgpu.transpose_load %wgmem [%idx1 , %idx2 ] : memref <128 x32 xi8 , 3 > -> vector <3 xi32 >
59+ return %0 : vector <3 xi32 >
60+ }
0 commit comments