Skip to content

Commit 8bf0193

Browse files
authored
Merge branch 'main' into dev/jholewinski/bitstream-remarks-msvc-fix
2 parents 781bab3 + 8fd558d commit 8bf0193

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+876
-902
lines changed

.ci/premerge_advisor_upload.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,11 @@ def main(commit_sha, workflow_run_number, build_log_files):
3232
"platform": current_platform,
3333
}
3434
if test_failures:
35-
for name, failure_message in test_failures:
36-
failure_info["failures"].append({"name": name, "message": failure_message})
35+
for _, failures in test_failures.items():
36+
for name, failure_message in failures:
37+
failure_info["failures"].append(
38+
{"name": name, "message": failure_message}
39+
)
3740
else:
3841
ninja_failures = generate_test_report_lib.find_failure_in_ninja_logs(ninja_logs)
3942
for name, failure_message in ninja_failures:

llvm/docs/LangRef.rst

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21062,12 +21062,15 @@ integer element type.
2106221062

2106321063
Syntax:
2106421064
"""""""
21065-
This is an overloaded intrinsic.
21065+
This is an overloaded intrinsic. You can use ``llvm.matrix.column.major.load``
21066+
to load any vector type with a stride of any bitwidth up to 64.
2106621067

2106721068
::
2106821069

21069-
declare vectorty @llvm.matrix.column.major.load.*(
21070+
declare <4 x i32> @llvm.matrix.column.major.load.v4i32.i64(
2107021071
ptrty %Ptr, i64 %Stride, i1 <IsVolatile>, i32 <Rows>, i32 <Cols>)
21072+
declare <9 x double> @llvm.matrix.column.major.load.v9f64.i32(
21073+
ptrty %Ptr, i32 %Stride, i1 <IsVolatile>, i32 <Rows>, i32 <Cols>)
2107121074

2107221075
Overview:
2107321076
"""""""""
@@ -21086,9 +21089,9 @@ Arguments:
2108621089

2108721090
The first argument ``%Ptr`` is a pointer type to the returned vector type, and
2108821091
corresponds to the start address to load from. The second argument ``%Stride``
21089-
is a positive, constant integer with ``%Stride >= <Rows>``. ``%Stride`` is used
21090-
to compute the column memory addresses. I.e., for a column ``C``, its start
21091-
memory addresses is calculated with ``%Ptr + C * %Stride``. The third Argument
21092+
is a positive integer for which ``%Stride >= <Rows>``. ``%Stride`` is used to
21093+
compute the column memory addresses. I.e., for a column ``C``, its start memory
21094+
addresses is calculated with ``%Ptr + C * %Stride``. The third Argument
2109221095
``<IsVolatile>`` is a boolean value. The fourth and fifth arguments,
2109321096
``<Rows>`` and ``<Cols>``, correspond to the number of rows and columns,
2109421097
respectively, and must be positive, constant integers. The returned vector must
@@ -21103,11 +21106,17 @@ The :ref:`align <attr_align>` parameter attribute can be provided for the
2110321106

2110421107
Syntax:
2110521108
"""""""
21109+
This is an overloaded intrinsic. ``llvm.matrix.column.major.store`` to store
21110+
any vector type with a stride of any bitwidth up to 64.
2110621111

2110721112
::
2110821113

21109-
declare void @llvm.matrix.column.major.store.*(
21110-
vectorty %In, ptrty %Ptr, i64 %Stride, i1 <IsVolatile>, i32 <Rows>, i32 <Cols>)
21114+
declare void @llvm.matrix.column.major.store.v4i32.i64(
21115+
<4 x i32> %In, ptrty %Ptr, i64 %Stride, i1 <IsVolatile>, i32 <Rows>,
21116+
i32 <Cols>)
21117+
declare void @llvm.matrix.column.major.store.v9f64.i32(
21118+
<9 x double> %In, ptrty %Ptr, i32 %Stride, i1 <IsVolatile>, i32
21119+
<Rows>, i32 <Cols>)
2111121120

2111221121
Overview:
2111321122
"""""""""
@@ -21127,7 +21136,7 @@ Arguments:
2112721136
The first argument ``%In`` is a vector that corresponds to a ``<Rows> x
2112821137
<Cols>`` matrix to be stored to memory. The second argument ``%Ptr`` is a
2112921138
pointer to the vector type of ``%In``, and is the start address of the matrix
21130-
in memory. The third argument ``%Stride`` is a positive, constant integer with
21139+
in memory. The third argument ``%Stride`` is a positive integer for which
2113121140
``%Stride >= <Rows>``. ``%Stride`` is used to compute the column memory
2113221141
addresses. I.e., for a column ``C``, its start memory addresses is calculated
2113321142
with ``%Ptr + C * %Stride``. The fourth argument ``<IsVolatile>`` is a boolean

llvm/lib/IR/Verifier.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6479,9 +6479,12 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
64796479
NumRows->getZExtValue() * NumColumns->getZExtValue(),
64806480
"Result of a matrix operation does not fit in the returned vector!");
64816481

6482-
if (Stride)
6482+
if (Stride) {
6483+
Check(Stride->getBitWidth() <= 64, "Stride bitwidth cannot exceed 64!",
6484+
IF);
64836485
Check(Stride->getZExtValue() >= NumRows->getZExtValue(),
64846486
"Stride must be greater or equal than the number of rows!", IF);
6487+
}
64856488

64866489
break;
64876490
}

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 8 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -29755,65 +29755,30 @@ static SDValue LowervXi8MulWithUNPCK(SDValue A, SDValue B, const SDLoc &dl,
2975529755
const X86Subtarget &Subtarget,
2975629756
SelectionDAG &DAG,
2975729757
SDValue *Low = nullptr) {
29758-
unsigned NumElts = VT.getVectorNumElements();
29759-
2976029758
// For vXi8 we will unpack the low and high half of each 128 bit lane to widen
2976129759
// to a vXi16 type. Do the multiplies, shift the results and pack the half
2976229760
// lane results back together.
2976329761

2976429762
// We'll take different approaches for signed and unsigned.
29765-
// For unsigned we'll use punpcklbw/punpckhbw to put zero extend the bytes
29766-
// and use pmullw to calculate the full 16-bit product.
29763+
// For unsigned we'll use punpcklbw/punpckhbw to zero extend the bytes to
29764+
// words and use pmullw to calculate the full 16-bit product.
2976729765
// For signed we'll use punpcklbw/punpckbw to extend the bytes to words and
2976829766
// shift them left into the upper byte of each word. This allows us to use
2976929767
// pmulhw to calculate the full 16-bit product. This trick means we don't
2977029768
// need to sign extend the bytes to use pmullw.
29771-
29772-
MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
29769+
MVT ExVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
2977329770
SDValue Zero = DAG.getConstant(0, dl, VT);
2977429771

29775-
SDValue ALo, AHi;
29772+
SDValue ALo, AHi, BLo, BHi;
2977629773
if (IsSigned) {
2977729774
ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, Zero, A));
29778-
AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, Zero, A));
29779-
} else {
29780-
ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, A, Zero));
29781-
AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, A, Zero));
29782-
}
29783-
29784-
SDValue BLo, BHi;
29785-
if (ISD::isBuildVectorOfConstantSDNodes(B.getNode())) {
29786-
// If the RHS is a constant, manually unpackl/unpackh and extend.
29787-
SmallVector<SDValue, 16> LoOps, HiOps;
29788-
for (unsigned i = 0; i != NumElts; i += 16) {
29789-
for (unsigned j = 0; j != 8; ++j) {
29790-
SDValue LoOp = B.getOperand(i + j);
29791-
SDValue HiOp = B.getOperand(i + j + 8);
29792-
29793-
if (IsSigned) {
29794-
LoOp = DAG.getAnyExtOrTrunc(LoOp, dl, MVT::i16);
29795-
HiOp = DAG.getAnyExtOrTrunc(HiOp, dl, MVT::i16);
29796-
LoOp = DAG.getNode(ISD::SHL, dl, MVT::i16, LoOp,
29797-
DAG.getConstant(8, dl, MVT::i16));
29798-
HiOp = DAG.getNode(ISD::SHL, dl, MVT::i16, HiOp,
29799-
DAG.getConstant(8, dl, MVT::i16));
29800-
} else {
29801-
LoOp = DAG.getZExtOrTrunc(LoOp, dl, MVT::i16);
29802-
HiOp = DAG.getZExtOrTrunc(HiOp, dl, MVT::i16);
29803-
}
29804-
29805-
LoOps.push_back(LoOp);
29806-
HiOps.push_back(HiOp);
29807-
}
29808-
}
29809-
29810-
BLo = DAG.getBuildVector(ExVT, dl, LoOps);
29811-
BHi = DAG.getBuildVector(ExVT, dl, HiOps);
29812-
} else if (IsSigned) {
2981329775
BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, Zero, B));
29776+
AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, Zero, A));
2981429777
BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, Zero, B));
2981529778
} else {
29779+
ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, A, Zero));
2981629780
BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, B, Zero));
29781+
AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, A, Zero));
2981729782
BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, B, Zero));
2981829783
}
2981929784

@@ -29826,7 +29791,7 @@ static SDValue LowervXi8MulWithUNPCK(SDValue A, SDValue B, const SDLoc &dl,
2982629791
if (Low)
2982729792
*Low = getPack(DAG, Subtarget, dl, VT, RLo, RHi);
2982829793

29829-
return getPack(DAG, Subtarget, dl, VT, RLo, RHi, /*PackHiHalf*/ true);
29794+
return getPack(DAG, Subtarget, dl, VT, RLo, RHi, /*PackHiHalf=*/true);
2983029795
}
2983129796

2983229797
static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,

llvm/lib/Target/X86/X86MCInstLower.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1959,6 +1959,7 @@ static void addConstantComments(const MachineInstr *MI,
19591959
}
19601960

19611961
CASE_ARITH_RM(PMADDWD)
1962+
CASE_ARITH_RM(PMULLD)
19621963
CASE_ARITH_RM(PMULLW)
19631964
CASE_ARITH_RM(PMULHW)
19641965
CASE_ARITH_RM(PMULHUW)

llvm/test/CodeGen/X86/avx-shift.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ define <8 x i32> @vshift08_add(<8 x i32> %a, <8 x i32> %y) {
201201
define <4 x i32> @vshift13(<4 x i32> %in) {
202202
; CHECK-LABEL: vshift13:
203203
; CHECK: # %bb.0:
204-
; CHECK-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
204+
; CHECK-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,4,16]
205205
; CHECK-NEXT: retq
206206
%T = shl <4 x i32> %in, <i32 0, i32 1, i32 2, i32 4>
207207
ret <4 x i32> %T

llvm/test/CodeGen/X86/avx2-arith.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -199,12 +199,12 @@ define <8 x i32> @mul_const5(<8 x i32> %x) {
199199
define <8 x i32> @mul_const6(<8 x i32> %x) {
200200
; X86-LABEL: mul_const6:
201201
; X86: # %bb.0:
202-
; X86-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
202+
; X86-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # [0,0,0,2,0,2,0,0]
203203
; X86-NEXT: retl
204204
;
205205
; X64-LABEL: mul_const6:
206206
; X64: # %bb.0:
207-
; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
207+
; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [0,0,0,2,0,2,0,0]
208208
; X64-NEXT: retq
209209
%y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 2, i32 0, i32 2, i32 0, i32 0>
210210
ret <8 x i32> %y

llvm/test/CodeGen/X86/combine-mul.ll

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ define <4 x i32> @combine_vec_mul_pow2a(<4 x i32> %x) {
6666
define <4 x i32> @combine_vec_mul_pow2b(<4 x i32> %x) {
6767
; SSE-LABEL: combine_vec_mul_pow2b:
6868
; SSE: # %bb.0:
69-
; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
69+
; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2,4,16]
7070
; SSE-NEXT: retq
7171
;
7272
; AVX-LABEL: combine_vec_mul_pow2b:
@@ -120,12 +120,12 @@ define <4 x i32> @combine_vec_mul_negpow2a(<4 x i32> %x) {
120120
define <4 x i32> @combine_vec_mul_negpow2b(<4 x i32> %x) {
121121
; SSE-LABEL: combine_vec_mul_negpow2b:
122122
; SSE: # %bb.0:
123-
; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
123+
; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [4294967295,4294967294,4294967292,4294967280]
124124
; SSE-NEXT: retq
125125
;
126126
; AVX-LABEL: combine_vec_mul_negpow2b:
127127
; AVX: # %bb.0:
128-
; AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
128+
; AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [4294967295,4294967294,4294967292,4294967280]
129129
; AVX-NEXT: retq
130130
%1 = mul <4 x i32> %x, <i32 -1, i32 -2, i32 -4, i32 -16>
131131
ret <4 x i32> %1
@@ -176,12 +176,12 @@ define <4 x i64> @combine_vec_mul_negpow2c(<4 x i64> %x) {
176176
define <4 x i32> @combine_vec_mul_shl_const(<4 x i32> %x) {
177177
; SSE-LABEL: combine_vec_mul_shl_const:
178178
; SSE: # %bb.0:
179-
; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
179+
; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2,12,1280,458752]
180180
; SSE-NEXT: retq
181181
;
182182
; AVX-LABEL: combine_vec_mul_shl_const:
183183
; AVX: # %bb.0:
184-
; AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
184+
; AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2,12,1280,458752]
185185
; AVX-NEXT: retq
186186
%1 = shl <4 x i32> %x, <i32 1, i32 2, i32 8, i32 16>
187187
%2 = mul <4 x i32> %1, <i32 1, i32 3, i32 5, i32 7>
@@ -193,7 +193,7 @@ define <4 x i32> @combine_vec_mul_shl_oneuse0(<4 x i32> %x, <4 x i32> %y) {
193193
; SSE-LABEL: combine_vec_mul_shl_oneuse0:
194194
; SSE: # %bb.0:
195195
; SSE-NEXT: pmulld %xmm1, %xmm0
196-
; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
196+
; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2,4,256,65536]
197197
; SSE-NEXT: retq
198198
;
199199
; AVX-LABEL: combine_vec_mul_shl_oneuse0:
@@ -210,7 +210,7 @@ define <4 x i32> @combine_vec_mul_shl_oneuse1(<4 x i32> %x, <4 x i32> %y) {
210210
; SSE-LABEL: combine_vec_mul_shl_oneuse1:
211211
; SSE: # %bb.0:
212212
; SSE-NEXT: pmulld %xmm1, %xmm0
213-
; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
213+
; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2,4,256,65536]
214214
; SSE-NEXT: retq
215215
;
216216
; AVX-LABEL: combine_vec_mul_shl_oneuse1:
@@ -226,7 +226,7 @@ define <4 x i32> @combine_vec_mul_shl_oneuse1(<4 x i32> %x, <4 x i32> %y) {
226226
define <4 x i32> @combine_vec_mul_shl_multiuse0(<4 x i32> %x, <4 x i32> %y) {
227227
; SSE-LABEL: combine_vec_mul_shl_multiuse0:
228228
; SSE: # %bb.0:
229-
; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
229+
; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2,4,256,65536]
230230
; SSE-NEXT: pmulld %xmm0, %xmm1
231231
; SSE-NEXT: paddd %xmm1, %xmm0
232232
; SSE-NEXT: retq
@@ -246,7 +246,7 @@ define <4 x i32> @combine_vec_mul_shl_multiuse0(<4 x i32> %x, <4 x i32> %y) {
246246
define <4 x i32> @combine_vec_mul_shl_multiuse1(<4 x i32> %x, <4 x i32> %y) {
247247
; SSE-LABEL: combine_vec_mul_shl_multiuse1:
248248
; SSE: # %bb.0:
249-
; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
249+
; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2,4,256,65536]
250250
; SSE-NEXT: pmulld %xmm0, %xmm1
251251
; SSE-NEXT: paddd %xmm1, %xmm0
252252
; SSE-NEXT: retq
@@ -268,13 +268,13 @@ define <4 x i32> @combine_vec_mul_shl_multiuse1(<4 x i32> %x, <4 x i32> %y) {
268268
define <4 x i32> @combine_vec_mul_add(<4 x i32> %x) {
269269
; SSE-LABEL: combine_vec_mul_add:
270270
; SSE: # %bb.0:
271-
; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
271+
; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [4,6,2,0]
272272
; SSE-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
273273
; SSE-NEXT: retq
274274
;
275275
; AVX-LABEL: combine_vec_mul_add:
276276
; AVX: # %bb.0:
277-
; AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
277+
; AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [4,6,2,0]
278278
; AVX-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
279279
; AVX-NEXT: retq
280280
%1 = add <4 x i32> %x, <i32 1, i32 2, i32 8, i32 16>

llvm/test/CodeGen/X86/combine-sdiv.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2927,7 +2927,7 @@ define <16 x i8> @pr38658(<16 x i8> %x) {
29272927
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
29282928
; SSE2-NEXT: pxor %xmm3, %xmm3
29292929
; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
2930-
; SSE2-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [0,0,0,0,0,0,0,37632]
2930+
; SSE2-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,147]
29312931
; SSE2-NEXT: psrlw $8, %xmm3
29322932
; SSE2-NEXT: packuswb %xmm3, %xmm1
29332933
; SSE2-NEXT: paddb %xmm1, %xmm0
@@ -2947,7 +2947,7 @@ define <16 x i8> @pr38658(<16 x i8> %x) {
29472947
; SSE41-NEXT: pxor %xmm1, %xmm1
29482948
; SSE41-NEXT: pxor %xmm2, %xmm2
29492949
; SSE41-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
2950-
; SSE41-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [0,0,0,0,0,0,0,37632]
2950+
; SSE41-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,147]
29512951
; SSE41-NEXT: psrlw $8, %xmm2
29522952
; SSE41-NEXT: packuswb %xmm2, %xmm1
29532953
; SSE41-NEXT: paddb %xmm0, %xmm1
@@ -2971,7 +2971,7 @@ define <16 x i8> @pr38658(<16 x i8> %x) {
29712971
; AVX1: # %bb.0:
29722972
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
29732973
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
2974-
; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [0,0,0,0,0,0,0,37632]
2974+
; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,147]
29752975
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
29762976
; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
29772977
; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm1
@@ -3044,7 +3044,7 @@ define <16 x i8> @pr38658(<16 x i8> %x) {
30443044
; XOP: # %bb.0:
30453045
; XOP-NEXT: vpxor %xmm1, %xmm1, %xmm1
30463046
; XOP-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
3047-
; XOP-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [0,0,0,0,0,0,0,37632]
3047+
; XOP-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,147]
30483048
; XOP-NEXT: vpperm {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15],xmm2[1,3,5,7,9,11,13,15]
30493049
; XOP-NEXT: vpaddb %xmm0, %xmm1, %xmm0
30503050
; XOP-NEXT: vpshab {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1

0 commit comments

Comments
 (0)