Skip to content

Commit 684b1e7

Browse files
authored
Merge branch 'llvm:main' into main
2 parents fa0a7c6 + a9c417c commit 684b1e7

File tree

5 files changed

+69
-40
lines changed

5 files changed

+69
-40
lines changed

clang/lib/Headers/emmintrin.h

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3512,8 +3512,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void) {
35123512
/// destination vector of [2 x i64].
35133513
/// \returns An initialized 128-bit vector of [2 x i64] containing the values
35143514
/// provided in the operands.
3515-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64x(long long __q1,
3516-
long long __q0) {
3515+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
3516+
_mm_set_epi64x(long long __q1, long long __q0) {
35173517
return __extension__(__m128i)(__v2di){__q0, __q1};
35183518
}
35193519

@@ -3533,9 +3533,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64x(long long __q1,
35333533
/// destination vector of [2 x i64].
35343534
/// \returns An initialized 128-bit vector of [2 x i64] containing the values
35353535
/// provided in the operands.
3536-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64(__m64 __q1,
3537-
__m64 __q0) {
3538-
return _mm_set_epi64x((long long)__q1, (long long)__q0);
3536+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
3537+
_mm_set_epi64(__m64 __q1, __m64 __q0) {
3538+
return _mm_set_epi64x((long long)__q1[0], (long long)__q0[0]);
35393539
}
35403540

35413541
/// Initializes the 32-bit values in a 128-bit vector of [4 x i32] with
@@ -3560,8 +3560,10 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64(__m64 __q1,
35603560
/// vector.
35613561
/// \returns An initialized 128-bit vector of [4 x i32] containing the values
35623562
/// provided in the operands.
3563-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi32(int __i3, int __i2,
3564-
int __i1, int __i0) {
3563+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set_epi32(int __i3,
3564+
int __i2,
3565+
int __i1,
3566+
int __i0) {
35653567
return __extension__(__m128i)(__v4si){__i0, __i1, __i2, __i3};
35663568
}
35673569

@@ -3599,7 +3601,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi32(int __i3, int __i2,
35993601
/// vector.
36003602
/// \returns An initialized 128-bit vector of [8 x i16] containing the values
36013603
/// provided in the operands.
3602-
static __inline__ __m128i __DEFAULT_FN_ATTRS
3604+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
36033605
_mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3,
36043606
short __w2, short __w1, short __w0) {
36053607
return __extension__(__m128i)(__v8hi){__w0, __w1, __w2, __w3,
@@ -3648,7 +3650,7 @@ _mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3,
36483650
/// Initializes bits [7:0] of the destination vector.
36493651
/// \returns An initialized 128-bit vector of [16 x i8] containing the values
36503652
/// provided in the operands.
3651-
static __inline__ __m128i __DEFAULT_FN_ATTRS
3653+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
36523654
_mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11,
36533655
char __b10, char __b9, char __b8, char __b7, char __b6, char __b5,
36543656
char __b4, char __b3, char __b2, char __b1, char __b0) {
@@ -3670,7 +3672,8 @@ _mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11,
36703672
/// vector.
36713673
/// \returns An initialized 128-bit integer vector of [2 x i64] with both
36723674
/// elements containing the value provided in the operand.
3673-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64x(long long __q) {
3675+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
3676+
_mm_set1_epi64x(long long __q) {
36743677
return _mm_set_epi64x(__q, __q);
36753678
}
36763679

@@ -3687,7 +3690,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64x(long long __q) {
36873690
/// vector.
36883691
/// \returns An initialized 128-bit vector of [2 x i64] with all elements
36893692
/// containing the value provided in the operand.
3690-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64(__m64 __q) {
3693+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
3694+
_mm_set1_epi64(__m64 __q) {
36913695
return _mm_set_epi64(__q, __q);
36923696
}
36933697

@@ -3704,7 +3708,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64(__m64 __q) {
37043708
/// vector.
37053709
/// \returns An initialized 128-bit vector of [4 x i32] with all elements
37063710
/// containing the value provided in the operand.
3707-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi32(int __i) {
3711+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi32(int __i) {
37083712
return _mm_set_epi32(__i, __i, __i, __i);
37093713
}
37103714

@@ -3721,7 +3725,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi32(int __i) {
37213725
/// vector.
37223726
/// \returns An initialized 128-bit vector of [8 x i16] with all elements
37233727
/// containing the value provided in the operand.
3724-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi16(short __w) {
3728+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
3729+
_mm_set1_epi16(short __w) {
37253730
return _mm_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w);
37263731
}
37273732

@@ -3738,7 +3743,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi16(short __w) {
37383743
/// vector.
37393744
/// \returns An initialized 128-bit vector of [16 x i8] with all elements
37403745
/// containing the value provided in the operand.
3741-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b) {
3746+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi8(char __b) {
37423747
return _mm_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b,
37433748
__b, __b, __b, __b, __b);
37443749
}
@@ -3757,8 +3762,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b) {
37573762
/// A 64-bit integral value used to initialize the upper 64 bits of the
37583763
/// result.
37593764
/// \returns An initialized 128-bit integer vector.
3760-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi64(__m64 __q0,
3761-
__m64 __q1) {
3765+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
3766+
_mm_setr_epi64(__m64 __q0, __m64 __q1) {
37623767
return _mm_set_epi64(__q1, __q0);
37633768
}
37643769

@@ -3779,9 +3784,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi64(__m64 __q0,
37793784
/// \param __i3
37803785
/// A 32-bit integral value used to initialize bits [127:96] of the result.
37813786
/// \returns An initialized 128-bit integer vector.
3782-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi32(int __i0, int __i1,
3783-
int __i2,
3784-
int __i3) {
3787+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
3788+
_mm_setr_epi32(int __i0, int __i1, int __i2, int __i3) {
37853789
return _mm_set_epi32(__i3, __i2, __i1, __i0);
37863790
}
37873791

@@ -3810,7 +3814,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi32(int __i0, int __i1,
38103814
/// \param __w7
38113815
/// A 16-bit integral value used to initialize bits [127:112] of the result.
38123816
/// \returns An initialized 128-bit integer vector.
3813-
static __inline__ __m128i __DEFAULT_FN_ATTRS
3817+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
38143818
_mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4,
38153819
short __w5, short __w6, short __w7) {
38163820
return _mm_set_epi16(__w7, __w6, __w5, __w4, __w3, __w2, __w1, __w0);
@@ -3857,7 +3861,7 @@ _mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4,
38573861
/// \param __b15
38583862
/// An 8-bit integral value used to initialize bits [127:120] of the result.
38593863
/// \returns An initialized 128-bit integer vector.
3860-
static __inline__ __m128i __DEFAULT_FN_ATTRS
3864+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
38613865
_mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
38623866
char __b6, char __b7, char __b8, char __b9, char __b10,
38633867
char __b11, char __b12, char __b13, char __b14, char __b15) {

clang/test/CodeGen/X86/builtin_test_helpers.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,16 @@ constexpr bool match_v4si(__m128i _v, int a, int b, int c, int d) {
2525
return v[0] == a && v[1] == b && v[2] == c && v[3] == d;
2626
}
2727

28+
constexpr bool match_v8hi(__m128i _v, short a, short b, short c, short d, short e, short f, short g, short h) {
29+
__v8hi v = (__v8hi)_v;
30+
return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h;
31+
}
32+
33+
constexpr bool match_v16qi(__m128i _v, char a, char b, char c, char d, char e, char f, char g, char h, char i, char j, char k, char l, char m, char n, char o, char p) {
34+
__v16qi v = (__v16qi)_v;
35+
return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] == l && v[12] == m && v[13] == n && v[14] == o && v[15] == p;
36+
}
37+
2838
constexpr bool match_m256(__m256 v, float a, float b, float c, float d, float e, float f, float g, float h) {
2939
return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h;
3040
}

clang/test/CodeGen/X86/sse2-builtins.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1013,6 +1013,7 @@ __m128i test_mm_set_epi8(char A, char B, char C, char D,
10131013
// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
10141014
return _mm_set_epi8(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P);
10151015
}
1016+
TEST_CONSTEXPR(match_v16qi(_mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0));
10161017

10171018
__m128i test_mm_set_epi16(short A, short B, short C, short D,
10181019
short E, short F, short G, short H) {
@@ -1027,6 +1028,7 @@ __m128i test_mm_set_epi16(short A, short B, short C, short D,
10271028
// CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
10281029
return _mm_set_epi16(A, B, C, D, E, F, G, H);
10291030
}
1031+
TEST_CONSTEXPR(match_v8hi(_mm_set_epi16(0, -1, -2, -3, -4, -5, -6, -7), -7, -6, -5, -4, -3, -2, -1, 0));
10301032

10311033
__m128i test_mm_set_epi32(int A, int B, int C, int D) {
10321034
// CHECK-LABEL: test_mm_set_epi32
@@ -1036,20 +1038,23 @@ __m128i test_mm_set_epi32(int A, int B, int C, int D) {
10361038
// CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
10371039
return _mm_set_epi32(A, B, C, D);
10381040
}
1041+
TEST_CONSTEXPR(match_v4si(_mm_set_epi32(1, -3, 5, -7), -7, 5, -3, 1));
10391042

10401043
__m128i test_mm_set_epi64(__m64 A, __m64 B) {
10411044
// CHECK-LABEL: test_mm_set_epi64
10421045
// CHECK: insertelement <2 x i64> poison, i64 %{{.*}}, i32 0
10431046
// CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
10441047
return _mm_set_epi64(A, B);
10451048
}
1049+
TEST_CONSTEXPR(match_v2di(_mm_set_epi64((__m64){-1}, (__m64){42}), 42, -1));
10461050

10471051
__m128i test_mm_set_epi64x(long long A, long long B) {
10481052
// CHECK-LABEL: test_mm_set_epi64x
10491053
// CHECK: insertelement <2 x i64> poison, i64 %{{.*}}, i32 0
10501054
// CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
10511055
return _mm_set_epi64x(A, B);
10521056
}
1057+
TEST_CONSTEXPR(match_v2di(_mm_set_epi64x(100, -1000), -1000, 100));
10531058

10541059
__m128d test_mm_set_pd(double A, double B) {
10551060
// CHECK-LABEL: test_mm_set_pd
@@ -1095,6 +1100,7 @@ __m128i test_mm_set1_epi8(char A) {
10951100
// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
10961101
return _mm_set1_epi8(A);
10971102
}
1103+
TEST_CONSTEXPR(match_v16qi(_mm_set1_epi8(99), 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99));
10981104

10991105
__m128i test_mm_set1_epi16(short A) {
11001106
// CHECK-LABEL: test_mm_set1_epi16
@@ -1108,6 +1114,7 @@ __m128i test_mm_set1_epi16(short A) {
11081114
// CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
11091115
return _mm_set1_epi16(A);
11101116
}
1117+
TEST_CONSTEXPR(match_v8hi(_mm_set1_epi16(-128), -128, -128, -128, -128, -128, -128, -128, -128));
11111118

11121119
__m128i test_mm_set1_epi32(int A) {
11131120
// CHECK-LABEL: test_mm_set1_epi32
@@ -1117,20 +1124,23 @@ __m128i test_mm_set1_epi32(int A) {
11171124
// CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
11181125
return _mm_set1_epi32(A);
11191126
}
1127+
TEST_CONSTEXPR(match_v4si(_mm_set1_epi32(55), 55, 55, 55, 55));
11201128

11211129
__m128i test_mm_set1_epi64(__m64 A) {
11221130
// CHECK-LABEL: test_mm_set1_epi64
11231131
// CHECK: insertelement <2 x i64> poison, i64 %{{.*}}, i32 0
11241132
// CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
11251133
return _mm_set1_epi64(A);
11261134
}
1135+
TEST_CONSTEXPR(match_v2di(_mm_set1_epi64((__m64){-65535}), -65535, -65535));
11271136

11281137
__m128i test_mm_set1_epi64x(long long A) {
11291138
// CHECK-LABEL: test_mm_set1_epi64x
11301139
// CHECK: insertelement <2 x i64> poison, i64 %{{.*}}, i32 0
11311140
// CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
11321141
return _mm_set1_epi64x(A);
11331142
}
1143+
TEST_CONSTEXPR(match_v2di(_mm_set1_epi64x(65536), 65536, 65536));
11341144

11351145
__m128d test_mm_set1_pd(double A) {
11361146
// CHECK-LABEL: test_mm_set1_pd
@@ -1163,6 +1173,7 @@ __m128i test_mm_setr_epi8(char A, char B, char C, char D,
11631173
// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
11641174
return _mm_setr_epi8(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P);
11651175
}
1176+
TEST_CONSTEXPR(match_v16qi(_mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15));
11661177

11671178
__m128i test_mm_setr_epi16(short A, short B, short C, short D,
11681179
short E, short F, short G, short H) {
@@ -1177,6 +1188,7 @@ __m128i test_mm_setr_epi16(short A, short B, short C, short D,
11771188
// CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
11781189
return _mm_setr_epi16(A, B, C, D, E, F, G, H);
11791190
}
1191+
TEST_CONSTEXPR(match_v8hi(_mm_setr_epi16(0, -1, -2, -3, -4, -5, -6, -7), 0, -1, -2, -3, -4, -5, -6, -7));
11801192

11811193
__m128i test_mm_setr_epi32(int A, int B, int C, int D) {
11821194
// CHECK-LABEL: test_mm_setr_epi32
@@ -1186,13 +1198,15 @@ __m128i test_mm_setr_epi32(int A, int B, int C, int D) {
11861198
// CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
11871199
return _mm_setr_epi32(A, B, C, D);
11881200
}
1201+
TEST_CONSTEXPR(match_v4si(_mm_setr_epi32(1, -3, 5, -7), 1, -3, 5, -7));
11891202

11901203
__m128i test_mm_setr_epi64(__m64 A, __m64 B) {
11911204
// CHECK-LABEL: test_mm_setr_epi64
11921205
// CHECK: insertelement <2 x i64> poison, i64 %{{.*}}, i32 0
11931206
// CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
11941207
return _mm_setr_epi64(A, B);
11951208
}
1209+
TEST_CONSTEXPR(match_v2di(_mm_setr_epi64((__m64){-1}, (__m64){42}), -1, 42));
11961210

11971211
__m128d test_mm_setr_pd(double A, double B) {
11981212
// CHECK-LABEL: test_mm_setr_pd

mlir/include/mlir/Dialect/SCF/TransformOps/SCFTransformOps.td

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ def LoopPeelOp : Op<Transform_Dialect, "loop.peel",
146146
let summary = "Peels the first or last iteration of the loop";
147147
let description = [{
148148
Rewrite the given loop with a main loop and a partial (first or last) loop.
149-
When the `peelFront` option is set as true, the first iteration is peeled off.
149+
When the `peelFront` option is set to true, the first iteration is peeled off.
150150
Otherwise, updates the given loop so that its step evenly divides its range and puts
151151
the remaining iteration into a separate loop or a conditional.
152152

@@ -156,18 +156,20 @@ def LoopPeelOp : Op<Transform_Dialect, "loop.peel",
156156
#### Return modes
157157

158158
This operation ignores non-scf::ForOp ops and drops them in the return.
159-
160-
When `peelFront` is true, this operation returns two scf::ForOp Ops, the
161-
first scf::ForOp corresponds to the first iteration of the loop which can
162-
be canonicalized away in the following optimization. The second loop Op
163-
contains the remaining iteration, and the new lower bound is the original
164-
lower bound plus the number of steps.
165-
166-
When `peelFront` is not true, this operation returns two scf::ForOp Ops, with the first
167-
scf::ForOp satisfying: "the loop trip count is divisible by the step".
168-
The second loop Op contains the remaining iteration. Note that even though the
169-
Payload IR modification may be performed in-place, this operation consumes
170-
the operand handle and produces a new one.
159+
The op returns two loops, the peeled loop which has trip count divisible
160+
by the step, and the remainder loop.
161+
162+
When `peelFront` is true, the first result (remainder loop) executes all
163+
but the first iteration of the target loop. The second result (peeled
164+
loop) corresponds to the first iteration of the loop which can be
165+
canonicalized away in the following optimizations.
166+
167+
When `peelFront` is false, the first result (peeled loop) is the portion
168+
of the target loop with the highest upper bound that is divisible by the
169+
step. The second result (remainder loop) contains the remaining iterations.
170+
171+
Note that even though the Payload IR modification may be performed
172+
in-place, this operation consumes the operand handle and produces a new one.
171173

172174
#### Return Modes
173175

mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -206,12 +206,11 @@ LogicalResult mlir::scf::peelForLoopAndSimplifyBounds(RewriterBase &rewriter,
206206
return success();
207207
}
208208

209-
/// When the `peelFront` option is set as true, the first iteration of the loop
210-
/// is peeled off. This function rewrites the original scf::ForOp as two
211-
/// scf::ForOp Ops, the first scf::ForOp corresponds to the first iteration of
212-
/// the loop which can be canonicalized away in the following optimization. The
213-
/// second loop Op contains the remaining iteration, and the new lower bound is
214-
/// the original lower bound plus the number of steps.
209+
/// Rewrites the original scf::ForOp as two scf::ForOp Ops, the first
210+
/// scf::ForOp corresponds to the first iteration of the loop which can be
211+
/// canonicalized away in the following optimizations. The second loop Op
212+
/// contains the remaining iterations, with a lower bound updated as the
213+
/// original lower bound plus the step (i.e. skips the first iteration).
215214
LogicalResult mlir::scf::peelForLoopFirstIteration(RewriterBase &b, ForOp forOp,
216215
ForOp &firstIteration) {
217216
RewriterBase::InsertionGuard guard(b);

0 commit comments

Comments
 (0)