|
| 1 | +// RUN: %clang_cc1 -fenable-matrix %s -emit-llvm -triple x86_64-unknown-linux -disable-llvm-passes -o - -std=c++11 | FileCheck %s |
| 2 | + |
| 3 | +using i8x3 = _BitInt(8) __attribute__((ext_vector_type(3))); |
| 4 | +using i8x3x3 = _BitInt(8) __attribute__((matrix_type(3, 3))); |
| 5 | +using i32x3 = _BitInt(32) __attribute__((ext_vector_type(3))); |
| 6 | +using i32x3x3 = _BitInt(32) __attribute__((matrix_type(3, 3))); |
| 7 | +using i512x3 = _BitInt(512) __attribute__((ext_vector_type(3))); |
| 8 | +using i512x3x3 = _BitInt(512) __attribute__((matrix_type(3, 3))); |
| 9 | + |
| 10 | +// CHECK-LABEL: define dso_local i32 @_Z2v1Dv3_DB8_(i32 %a.coerce) |
| 11 | +i8x3 v1(i8x3 a) { |
| 12 | + // CHECK-NEXT: entry: |
| 13 | + // CHECK-NEXT: %retval = alloca <3 x i8>, align 4 |
| 14 | + // CHECK-NEXT: %a = alloca <3 x i8>, align 4 |
| 15 | + // CHECK-NEXT: %a.addr = alloca <3 x i8>, align 4 |
| 16 | + // CHECK-NEXT: store i32 %a.coerce, ptr %a, align 4 |
| 17 | + // CHECK-NEXT: %loadVec4 = load <4 x i8>, ptr %a, align 4 |
| 18 | + // CHECK-NEXT: %a1 = shufflevector <4 x i8> %loadVec4, <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2> |
| 19 | + // CHECK-NEXT: %extractVec = shufflevector <3 x i8> %a1, <3 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> |
| 20 | + // CHECK-NEXT: store <4 x i8> %extractVec, ptr %a.addr, align 4 |
| 21 | + // CHECK-NEXT: %loadVec42 = load <4 x i8>, ptr %a.addr, align 4 |
| 22 | + // CHECK-NEXT: %extractVec3 = shufflevector <4 x i8> %loadVec42, <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2> |
| 23 | + // CHECK-NEXT: %loadVec44 = load <4 x i8>, ptr %a.addr, align 4 |
| 24 | + // CHECK-NEXT: %extractVec5 = shufflevector <4 x i8> %loadVec44, <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2> |
| 25 | + // CHECK-NEXT: %add = add <3 x i8> %extractVec3, %extractVec5 |
| 26 | + // CHECK-NEXT: store <3 x i8> %add, ptr %retval, align 4 |
| 27 | + // CHECK-NEXT: %0 = load i32, ptr %retval, align 4 |
| 28 | + // CHECK-NEXT: ret i32 %0 |
| 29 | + return a + a; |
| 30 | +} |
| 31 | + |
| 32 | +// CHECK-LABEL: define dso_local noundef <3 x i32> @_Z2v2Dv3_DB32_(<3 x i32> noundef %a) |
| 33 | +i32x3 v2(i32x3 a) { |
| 34 | + // CHECK-NEXT: entry: |
| 35 | + // CHECK-NEXT: %a.addr = alloca <3 x i32>, align 16 |
| 36 | + // CHECK-NEXT: %extractVec = shufflevector <3 x i32> %a, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> |
| 37 | + // CHECK-NEXT: store <4 x i32> %extractVec, ptr %a.addr, align 16 |
| 38 | + // CHECK-NEXT: %loadVec4 = load <4 x i32>, ptr %a.addr, align 16 |
| 39 | + // CHECK-NEXT: %extractVec1 = shufflevector <4 x i32> %loadVec4, <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2> |
| 40 | + // CHECK-NEXT: %loadVec42 = load <4 x i32>, ptr %a.addr, align 16 |
| 41 | + // CHECK-NEXT: %extractVec3 = shufflevector <4 x i32> %loadVec42, <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2> |
| 42 | + // CHECK-NEXT: %add = add <3 x i32> %extractVec1, %extractVec3 |
| 43 | + // CHECK-NEXT: ret <3 x i32> %add |
| 44 | + return a + a; |
| 45 | +} |
| 46 | + |
| 47 | +// CHECK-LABEL: define dso_local noundef <3 x i512> @_Z2v3Dv3_DB512_(ptr noundef byval(<3 x i512>) align 256 %0) |
| 48 | +i512x3 v3(i512x3 a) { |
| 49 | + // CHECK-NEXT: entry: |
| 50 | + // CHECK-NEXT: %a.addr = alloca <3 x i512>, align 256 |
| 51 | + // CHECK-NEXT: %loadVec4 = load <4 x i512>, ptr %0, align 256 |
| 52 | + // CHECK-NEXT: %a = shufflevector <4 x i512> %loadVec4, <4 x i512> poison, <3 x i32> <i32 0, i32 1, i32 2> |
| 53 | + // CHECK-NEXT: %extractVec = shufflevector <3 x i512> %a, <3 x i512> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> |
| 54 | + // CHECK-NEXT: store <4 x i512> %extractVec, ptr %a.addr, align 256 |
| 55 | + // CHECK-NEXT: %loadVec41 = load <4 x i512>, ptr %a.addr, align 256 |
| 56 | + // CHECK-NEXT: %extractVec2 = shufflevector <4 x i512> %loadVec41, <4 x i512> poison, <3 x i32> <i32 0, i32 1, i32 2> |
| 57 | + // CHECK-NEXT: %loadVec43 = load <4 x i512>, ptr %a.addr, align 256 |
| 58 | + // CHECK-NEXT: %extractVec4 = shufflevector <4 x i512> %loadVec43, <4 x i512> poison, <3 x i32> <i32 0, i32 1, i32 2> |
| 59 | + // CHECK-NEXT: %add = add <3 x i512> %extractVec2, %extractVec4 |
| 60 | + // CHECK-NEXT: ret <3 x i512> %add |
| 61 | + return a + a; |
| 62 | +} |
| 63 | + |
| 64 | +// CHECK-LABEL: define dso_local noundef <9 x i8> @_Z2m1u11matrix_typeILm3ELm3EDB8_E(<9 x i8> noundef %a) |
| 65 | +i8x3x3 m1(i8x3x3 a) { |
| 66 | + // CHECK-NEXT: entry: |
| 67 | + // CHECK-NEXT: %a.addr = alloca [9 x i8], align 1 |
| 68 | + // CHECK-NEXT: store <9 x i8> %a, ptr %a.addr, align 1 |
| 69 | + // CHECK-NEXT: %0 = load <9 x i8>, ptr %a.addr, align 1 |
| 70 | + // CHECK-NEXT: %1 = load <9 x i8>, ptr %a.addr, align 1 |
| 71 | + // CHECK-NEXT: %2 = add <9 x i8> %0, %1 |
| 72 | + // CHECK-NEXT: ret <9 x i8> %2 |
| 73 | + return a + a; |
| 74 | +} |
| 75 | + |
| 76 | +// CHECK-LABEL: define dso_local noundef <9 x i32> @_Z2m2u11matrix_typeILm3ELm3EDB32_E(<9 x i32> noundef %a) |
| 77 | +i32x3x3 m2(i32x3x3 a) { |
| 78 | + // CHECK-NEXT: entry: |
| 79 | + // CHECK-NEXT: %a.addr = alloca [9 x i32], align 4 |
| 80 | + // CHECK-NEXT: store <9 x i32> %a, ptr %a.addr, align 4 |
| 81 | + // CHECK-NEXT: %0 = load <9 x i32>, ptr %a.addr, align 4 |
| 82 | + // CHECK-NEXT: %1 = load <9 x i32>, ptr %a.addr, align 4 |
| 83 | + // CHECK-NEXT: %2 = add <9 x i32> %0, %1 |
| 84 | + // CHECK-NEXT: ret <9 x i32> %2 |
| 85 | + return a + a; |
| 86 | +} |
| 87 | + |
| 88 | +// CHECK-LABEL: define dso_local noundef <9 x i512> @_Z2m3u11matrix_typeILm3ELm3EDB512_E(<9 x i512> noundef %a) |
| 89 | +i512x3x3 m3(i512x3x3 a) { |
| 90 | + // CHECK-NEXT: entry: |
| 91 | + // CHECK-NEXT: %a.addr = alloca [9 x i512], align 8 |
| 92 | + // CHECK-NEXT: store <9 x i512> %a, ptr %a.addr, align 8 |
| 93 | + // CHECK-NEXT: %0 = load <9 x i512>, ptr %a.addr, align 8 |
| 94 | + // CHECK-NEXT: %1 = load <9 x i512>, ptr %a.addr, align 8 |
| 95 | + // CHECK-NEXT: %2 = add <9 x i512> %0, %1 |
| 96 | + // CHECK-NEXT: ret <9 x i512> %2 |
| 97 | + return a + a; |
| 98 | +} |
0 commit comments