|
24 | 24 | ret void
|
25 | 25 | }
|
26 | 26 |
|
| 27 | +; CHECK: operand 1 must be 8, 12 or 16 element i32 vector |
| 28 | +; CHECK-NEXT: %res = call <8 x float> @llvm.amdgcn.wmma.scale.f32.16x16x128.f8f6f4.v8f32.v16i64.v16i32(i32 0, <16 x i64> %A, i32 0, <16 x i32> %B, i16 0, <8 x float> %C, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false) |
| 29 | +; CHECK-NEXT: <16 x i64> %A |
| 30 | +define amdgpu_ps void @test_wmma_scale_f32_16x16x128_f8f6f4___v16i64_fp8___v16i32_fp8(<16 x i64> %A, <16 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) { |
| 31 | +bb: |
| 32 | + %res = call <8 x float> @llvm.amdgcn.wmma.scale.f32.16x16x128.f8f6f4.v8f32.v16i64.v16i32(i32 0, <16 x i64> %A, i32 0, <16 x i32> %B, i16 0, <8 x float> %C, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false) |
| 33 | + store <8 x float> %res, ptr addrspace(1) %out |
| 34 | + ret void |
| 35 | +} |
| 36 | + |
| 37 | +; CHECK: operand 3 must be 8, 12 or 16 element i32 vector |
| 38 | +; CHECK-NEXT: %res = call <8 x float> @llvm.amdgcn.wmma.scale.f32.16x16x128.f8f6f4.v8f32.v16i32.v16i64(i32 0, <16 x i32> %A, i32 0, <16 x i64> %B, i16 0, <8 x float> %C, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false) |
| 39 | +; CHECK-NEXT: <16 x i64> %B |
| 40 | +define amdgpu_ps void @test_wmma_scale_f32_16x16x128_f8f6f4___v16i32_fp8___v16i64_fp8(<16 x i32> %A, <16 x i64> %B, <8 x float> %C, ptr addrspace(1) %out) { |
| 41 | +bb: |
| 42 | + %res = call <8 x float> @llvm.amdgcn.wmma.scale.f32.16x16x128.f8f6f4.v8f32.v16i32.v16i64(i32 0, <16 x i32> %A, i32 0, <16 x i64> %B, i16 0, <8 x float> %C, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false) |
| 43 | + store <8 x float> %res, ptr addrspace(1) %out |
| 44 | + ret void |
| 45 | +} |
| 46 | + |
| 47 | +; CHECK: operand 1 must be 8, 12 or 16 element i32 vector |
| 48 | +; CHECK-NEXT: %res = call <8 x float> @llvm.amdgcn.wmma.scale16.f32.16x16x128.f8f6f4.v8f32.v16i64.v16i32(i32 0, <16 x i64> %A, i32 0, <16 x i32> %B, i16 0, <8 x float> %C, i32 0, i32 0, i64 0, i32 0, i32 0, i64 0, i1 false, i1 false) |
| 49 | +; CHECK-NEXT: <16 x i64> %A |
| 50 | +define amdgpu_ps void @test_wmma_scale16_f32_16x16x128_f8f6f4___v16i64_fp8___v16i32_fp8(<16 x i64> %A, <16 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) { |
| 51 | +bb: |
| 52 | + %res = call <8 x float> @llvm.amdgcn.wmma.scale16.f32.16x16x128.f8f6f4.v8f32.v16i64.v16i32(i32 0, <16 x i64> %A, i32 0, <16 x i32> %B, i16 0, <8 x float> %C, i32 0, i32 0, i64 0, i32 0, i32 0, i64 0, i1 false, i1 false) |
| 53 | + store <8 x float> %res, ptr addrspace(1) %out |
| 54 | + ret void |
| 55 | +} |
| 56 | + |
| 57 | +; CHECK: operand 3 must be 8, 12 or 16 element i32 vector |
| 58 | +; CHECK-NEXT: %res = call <8 x float> @llvm.amdgcn.wmma.scale16.f32.16x16x128.f8f6f4.v8f32.v16i32.v16i64(i32 0, <16 x i32> %A, i32 0, <16 x i64> %B, i16 0, <8 x float> %C, i32 0, i32 0, i64 0, i32 0, i32 0, i64 0, i1 false, i1 false) |
| 59 | +; CHECK-NEXT: <16 x i64> %B |
| 60 | +define amdgpu_ps void @test_wmma_scale16_f32_16x16x128_f8f6f4___v16i32_fp8___v16i64_fp8(<16 x i32> %A, <16 x i64> %B, <8 x float> %C, ptr addrspace(1) %out) { |
| 61 | +bb: |
| 62 | + %res = call <8 x float> @llvm.amdgcn.wmma.scale16.f32.16x16x128.f8f6f4.v8f32.v16i32.v16i64(i32 0, <16 x i32> %A, i32 0, <16 x i64> %B, i16 0, <8 x float> %C, i32 0, i32 0, i64 0, i32 0, i32 0, i64 0, i1 false, i1 false) |
| 63 | + store <8 x float> %res, ptr addrspace(1) %out |
| 64 | + ret void |
| 65 | +} |
| 66 | + |
27 | 67 | ; --------------------------------------------------------------------
|
28 | 68 | ; Impossible vector types
|
29 | 69 | ; --------------------------------------------------------------------
|
|
48 | 88 | ret void
|
49 | 89 | }
|
50 | 90 |
|
| 91 | +; CHECK: operand 1 must be 8, 12 or 16 element i32 vector |
| 92 | +; CHECK-NEXT: call <8 x float> @llvm.amdgcn.wmma.scale.f32.16x16x128.f8f6f4.v8f32.v15i32.v16i32(i32 0, <15 x i32> %A, i32 0, <16 x i32> %B, i16 0, <8 x float> %C, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false) |
| 93 | +; CHECK-NEXT: <15 x i32> %A |
| 94 | +define amdgpu_ps void @test_wmma_scale_f32_16x16x128_f8f6f4___v15i32_fp8___v16i32_fp8(<15 x i32> %A, <16 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) { |
| 95 | +bb: |
| 96 | + %res = call <8 x float> @llvm.amdgcn.wmma.scale.f32.16x16x128.f8f6f4.v8f32.v15i32.v16i32(i32 0, <15 x i32> %A, i32 0, <16 x i32> %B, i16 0, <8 x float> %C, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false) |
| 97 | + store <8 x float> %res, ptr addrspace(1) %out |
| 98 | + ret void |
| 99 | +} |
| 100 | + |
| 101 | +; CHECK: operand 3 must be 8, 12 or 16 element i32 vector |
| 102 | +; CHECK-NEXT: call <8 x float> @llvm.amdgcn.wmma.scale.f32.16x16x128.f8f6f4.v8f32.v16i32.v15i32(i32 0, <16 x i32> %A, i32 0, <15 x i32> %B, i16 0, <8 x float> %C, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false) |
| 103 | +; CHECK-NEXT: <15 x i32> %B |
| 104 | +define amdgpu_ps void @test_wmma_scale_f32_16x16x128_f8f6f4___v16i32_fp8___v15i32_fp8(<16 x i32> %A, <15 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) { |
| 105 | +bb: |
| 106 | + %res = call <8 x float> @llvm.amdgcn.wmma.scale.f32.16x16x128.f8f6f4.v8f32.v16i32.v15i32(i32 0, <16 x i32> %A, i32 0, <15 x i32> %B, i16 0, <8 x float> %C, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false) |
| 107 | + store <8 x float> %res, ptr addrspace(1) %out |
| 108 | + ret void |
| 109 | +} |
| 110 | + |
51 | 111 | ; --------------------------------------------------------------------
|
52 | 112 | ; Out of bounds format
|
53 | 113 | ; --------------------------------------------------------------------
|
|
72 | 132 | ret void
|
73 | 133 | }
|
74 | 134 |
|
| 135 | +; CHECK: invalid value for matrix format |
| 136 | +; CHECK-NEXT: %res = call <8 x float> @llvm.amdgcn.wmma.scale.f32.16x16x128.f8f6f4.v8f32.v16i32.v16i32(i32 5, <16 x i32> %A, i32 0, <16 x i32> %B, i16 0, <8 x float> %C, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false) |
| 137 | +; CHECK-NEXT: i32 5 |
| 138 | +define amdgpu_ps void @test_wmma_scale_f32_16x16x128_f8f6f4___v16i32_invalid0___v16i32_fp8(<16 x i32> %A, <16 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) { |
| 139 | +bb: |
| 140 | + %res = call <8 x float> @llvm.amdgcn.wmma.scale.f32.16x16x128.f8f6f4.v8f32.v16i32.v16i32(i32 5, <16 x i32> %A, i32 0, <16 x i32> %B, i16 0, <8 x float> %C, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false) |
| 141 | + store <8 x float> %res, ptr addrspace(1) %out |
| 142 | + ret void |
| 143 | +} |
| 144 | + |
| 145 | +; CHECK: invalid value for matrix format |
| 146 | +; CHECK-NEXT: %res = call <8 x float> @llvm.amdgcn.wmma.scale.f32.16x16x128.f8f6f4.v8f32.v16i32.v16i32(i32 0, <16 x i32> %A, i32 5, <16 x i32> %B, i16 0, <8 x float> %C, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false) |
| 147 | +; CHECK-NEXT: i32 5 |
| 148 | +define amdgpu_ps void @test_wmma_scale_f32_16x16x128_f8f6f4___v16i32_fp8___v16i32_invalid1(<16 x i32> %A, <16 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) { |
| 149 | +bb: |
| 150 | + %res = call <8 x float> @llvm.amdgcn.wmma.scale.f32.16x16x128.f8f6f4.v8f32.v16i32.v16i32(i32 0, <16 x i32> %A, i32 5, <16 x i32> %B, i16 0, <8 x float> %C, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false) |
| 151 | + store <8 x float> %res, ptr addrspace(1) %out |
| 152 | + ret void |
| 153 | +} |
| 154 | + |
75 | 155 | ; --------------------------------------------------------------------
|
76 | 156 | ; Incorrect signature for format cases (IR vector too small)
|
77 | 157 | ; --------------------------------------------------------------------
|
|
163 | 243 | store <8 x float> %res, ptr addrspace(1) %out
|
164 | 244 | ret void
|
165 | 245 | }
|
| 246 | + |
| 247 | +; CHECK: invalid vector type for format |
| 248 | +; CHECK-NEXT: %res = call <8 x float> @llvm.amdgcn.wmma.scale.f32.16x16x128.f8f6f4.v8f32.v8i32.v16i32(i32 2, <8 x i32> %A, i32 0, <16 x i32> %B, i16 0, <8 x float> %C, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false) |
| 249 | +; CHECK-NEXT: <8 x i32> %A |
| 250 | +; CHECK-NEXT: i32 2 |
| 251 | +define amdgpu_ps void @test_wmma_scale_f32_16x16x128_f8f6f4___v8i32_fp6___v16i32_fp8(<8 x i32> %A, <16 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) { |
| 252 | +bb: |
| 253 | + %res = call <8 x float> @llvm.amdgcn.wmma.scale.f32.16x16x128.f8f6f4.v8f32.v8i32.v16i32(i32 2, <8 x i32> %A, i32 0, <16 x i32> %B, i16 0, <8 x float> %C, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false) |
| 254 | + store <8 x float> %res, ptr addrspace(1) %out |
| 255 | + ret void |
| 256 | +} |
| 257 | + |
| 258 | +; CHECK: invalid vector type for format |
| 259 | +; CHECK-NEXT: %res = call <8 x float> @llvm.amdgcn.wmma.scale.f32.16x16x128.f8f6f4.v8f32.v16i32.v8i32(i32 0, <16 x i32> %A, i32 2, <8 x i32> %B, i16 0, <8 x float> %C, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false) |
| 260 | +; CHECK-NEXT: <8 x i32> %B |
| 261 | +; CHECK-NEXT: i32 2 |
| 262 | +define amdgpu_ps void @test_wmma_scale_f32_16x16x128_f8f6f4___v16i32_fp8___v8i32_fp6(<16 x i32> %A, <8 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) { |
| 263 | +bb: |
| 264 | + %res = call <8 x float> @llvm.amdgcn.wmma.scale.f32.16x16x128.f8f6f4.v8f32.v8i32.v16i32(i32 0, <16 x i32> %A, i32 2, <8 x i32> %B, i16 0, <8 x float> %C, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false) |
| 265 | + store <8 x float> %res, ptr addrspace(1) %out |
| 266 | + ret void |
| 267 | +} |
| 268 | + |
| 269 | +; CHECK: invalid vector type for format |
| 270 | +; CHECK-NEXT: %res = call <8 x float> @llvm.amdgcn.wmma.scale16.f32.16x16x128.f8f6f4.v8f32.v8i32.v16i32(i32 3, <8 x i32> %A, i32 0, <16 x i32> %B, i16 0, <8 x float> %C, i32 0, i32 0, i64 0, i32 0, i32 0, i64 0, i1 false, i1 false) |
| 271 | +; CHECK-NEXT: <8 x i32> %A |
| 272 | +; CHECK-NEXT: i32 3 |
| 273 | +define amdgpu_ps void @test_wmma_scale16_f32_16x16x128_f8f6f4___v8i32_bf6___v16i32_fp8(<8 x i32> %A, <16 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) { |
| 274 | +bb: |
| 275 | + %res = call <8 x float> @llvm.amdgcn.wmma.scale16.f32.16x16x128.f8f6f4.v8f32.v8i32.v16i32(i32 3, <8 x i32> %A, i32 0, <16 x i32> %B, i16 0, <8 x float> %C, i32 0, i32 0, i64 0, i32 0, i32 0, i64 0, i1 false, i1 false) |
| 276 | + store <8 x float> %res, ptr addrspace(1) %out |
| 277 | + ret void |
| 278 | +} |
| 279 | + |
| 280 | +; CHECK: invalid vector type for format |
| 281 | +; CHECK-NEXT: %res = call <8 x float> @llvm.amdgcn.wmma.scale16.f32.16x16x128.f8f6f4.v8f32.v16i32.v8i32(i32 0, <16 x i32> %A, i32 3, <8 x i32> %B, i16 0, <8 x float> %C, i32 0, i32 0, i64 0, i32 0, i32 0, i64 0, i1 false, i1 false) |
| 282 | +; CHECK-NEXT: <8 x i32> %B |
| 283 | +; CHECK-NEXT: i32 3 |
| 284 | +define amdgpu_ps void @test_wmma_scale_f32_16x16x128_f8f6f4___v16i32_fp8___v8i32_bf6(<16 x i32> %A, <8 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) { |
| 285 | +bb: |
| 286 | + %res = call <8 x float> @llvm.amdgcn.wmma.scale16.f32.16x16x128.f8f6f4.v8f32.v8i32.v16i32(i32 0, <16 x i32> %A, i32 3, <8 x i32> %B, i16 0, <8 x float> %C, i32 0, i32 0, i64 0, i32 0, i32 0, i64 0, i1 false, i1 false) |
| 287 | + store <8 x float> %res, ptr addrspace(1) %out |
| 288 | + ret void |
| 289 | +} |
0 commit comments