Skip to content

Commit 6f3f108

Browse files
authored
[X86] LowerMUL - remove vXi8 UNPCK(BUILD_VECTOR,UNDEF) special case handling (#168277)
getUnpackl/h + shuffle combining can now handle this for us generically
1 parent 8a055f8 commit 6f3f108

File tree

9 files changed

+39
-57
lines changed

9 files changed

+39
-57
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 2 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -29647,26 +29647,8 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
2964729647
SDValue Undef = DAG.getUNDEF(VT);
2964829648
SDValue ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, A, Undef));
2964929649
SDValue AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, A, Undef));
29650-
29651-
SDValue BLo, BHi;
29652-
if (ISD::isBuildVectorOfConstantSDNodes(B.getNode())) {
29653-
// If the RHS is a constant, manually unpackl/unpackh.
29654-
SmallVector<SDValue, 16> LoOps, HiOps;
29655-
for (unsigned i = 0; i != NumElts; i += 16) {
29656-
for (unsigned j = 0; j != 8; ++j) {
29657-
LoOps.push_back(DAG.getAnyExtOrTrunc(B.getOperand(i + j), dl,
29658-
MVT::i16));
29659-
HiOps.push_back(DAG.getAnyExtOrTrunc(B.getOperand(i + j + 8), dl,
29660-
MVT::i16));
29661-
}
29662-
}
29663-
29664-
BLo = DAG.getBuildVector(ExVT, dl, LoOps);
29665-
BHi = DAG.getBuildVector(ExVT, dl, HiOps);
29666-
} else {
29667-
BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, B, Undef));
29668-
BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, B, Undef));
29669-
}
29650+
SDValue BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, B, Undef));
29651+
SDValue BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, B, Undef));
2967029652

2967129653
// Multiply, mask the lower 8bits of the lo/hi results and pack.
2967229654
SDValue RLo = DAG.getNode(ISD::MUL, dl, ExVT, ALo, BLo);

llvm/test/CodeGen/X86/pmul.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ define <16 x i8> @mul_v16i8c(<16 x i8> %i) nounwind {
1010
; SSE2: # %bb.0: # %entry
1111
; SSE2-NEXT: movdqa %xmm0, %xmm1
1212
; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
13-
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [117,117,117,117,117,117,117,117]
13+
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [117,u,117,u,117,u,117,u,117,u,117,u,117,u,117,u]
1414
; SSE2-NEXT: pmullw %xmm2, %xmm1
1515
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
1616
; SSE2-NEXT: pand %xmm3, %xmm1
@@ -378,7 +378,7 @@ define <32 x i8> @mul_v32i8c(<32 x i8> %i) nounwind {
378378
; SSE2: # %bb.0: # %entry
379379
; SSE2-NEXT: movdqa %xmm0, %xmm2
380380
; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
381-
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [117,117,117,117,117,117,117,117]
381+
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [117,u,117,u,117,u,117,u,117,u,117,u,117,u,117,u]
382382
; SSE2-NEXT: pmullw %xmm3, %xmm2
383383
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
384384
; SSE2-NEXT: pand %xmm4, %xmm2
@@ -729,7 +729,7 @@ define <64 x i8> @mul_v64i8c(<64 x i8> %i) nounwind {
729729
; SSE2: # %bb.0: # %entry
730730
; SSE2-NEXT: movdqa %xmm0, %xmm6
731731
; SSE2-NEXT: punpckhbw {{.*#+}} xmm6 = xmm6[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
732-
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [117,117,117,117,117,117,117,117]
732+
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [117,u,117,u,117,u,117,u,117,u,117,u,117,u,117,u]
733733
; SSE2-NEXT: pmullw %xmm4, %xmm6
734734
; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255]
735735
; SSE2-NEXT: pand %xmm5, %xmm6

llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2213,12 +2213,12 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) {
22132213
; CHECK-SSE2-NEXT: movq %rdi, %rax
22142214
; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm5
22152215
; CHECK-SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
2216-
; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5 # [9,0,41,183,1,1,161,221]
2216+
; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5 # [9,u,0,u,41,u,183,u,1,u,1,u,161,u,221,u]
22172217
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
22182218
; CHECK-SSE2-NEXT: pand %xmm4, %xmm5
22192219
; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm6
22202220
; CHECK-SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2221-
; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6 # [171,103,183,171,61,1,127,183]
2221+
; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6 # [171,u,103,u,183,u,171,u,61,u,1,u,127,u,183,u]
22222222
; CHECK-SSE2-NEXT: pand %xmm4, %xmm6
22232223
; CHECK-SSE2-NEXT: packuswb %xmm5, %xmm6
22242224
; CHECK-SSE2-NEXT: paddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6
@@ -2242,10 +2242,10 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) {
22422242
; CHECK-SSE2-NEXT: por %xmm7, %xmm5
22432243
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1
22442244
; CHECK-SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
2245-
; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [223,223,205,183,161,1,171,239]
2245+
; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [223,u,223,u,205,u,183,u,161,u,1,u,171,u,239,u]
22462246
; CHECK-SSE2-NEXT: pand %xmm4, %xmm1
22472247
; CHECK-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2248-
; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [197,205,27,241,1,1,1,163]
2248+
; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [197,u,205,u,27,u,241,u,1,u,1,u,1,u,163,u]
22492249
; CHECK-SSE2-NEXT: pand %xmm4, %xmm0
22502250
; CHECK-SSE2-NEXT: packuswb %xmm1, %xmm0
22512251
; CHECK-SSE2-NEXT: paddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0

llvm/test/CodeGen/X86/vector-fshr-128.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1989,11 +1989,11 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind {
19891989
; SSE2-NEXT: paddb %xmm0, %xmm0
19901990
; SSE2-NEXT: movdqa %xmm0, %xmm2
19911991
; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1992-
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [128,1,2,4,8,16,32,64]
1992+
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [128,u,1,u,2,u,4,u,8,u,16,u,32,u,64,u]
19931993
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
19941994
; SSE2-NEXT: pand %xmm3, %xmm2
19951995
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1996-
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [128,64,32,16,8,4,2,1]
1996+
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [128,u,64,u,32,u,16,u,8,u,4,u,2,u,1,u]
19971997
; SSE2-NEXT: pand %xmm3, %xmm0
19981998
; SSE2-NEXT: packuswb %xmm2, %xmm0
19991999
; SSE2-NEXT: por %xmm1, %xmm0
@@ -2149,11 +2149,11 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind {
21492149
; X86-SSE2-NEXT: paddb %xmm0, %xmm0
21502150
; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
21512151
; X86-SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
2152-
; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 # [128,1,2,4,8,16,32,64]
2152+
; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 # [128,u,1,u,2,u,4,u,8,u,16,u,32,u,64,u]
21532153
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
21542154
; X86-SSE2-NEXT: pand %xmm3, %xmm2
21552155
; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2156-
; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [128,64,32,16,8,4,2,1]
2156+
; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [128,u,64,u,32,u,16,u,8,u,4,u,2,u,1,u]
21572157
; X86-SSE2-NEXT: pand %xmm3, %xmm0
21582158
; X86-SSE2-NEXT: packuswb %xmm2, %xmm0
21592159
; X86-SSE2-NEXT: por %xmm1, %xmm0

llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -691,11 +691,11 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind {
691691
; SSE2-NEXT: psubb %xmm2, %xmm1
692692
; SSE2-NEXT: movdqa %xmm1, %xmm2
693693
; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
694-
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [14,13,12,11,10,9,9,7]
694+
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [14,u,13,u,12,u,11,u,10,u,9,u,9,u,7,u]
695695
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
696696
; SSE2-NEXT: pand %xmm3, %xmm2
697697
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
698-
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [7,8,9,10,11,12,13,14]
698+
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [7,u,8,u,9,u,10,u,11,u,12,u,13,u,14,u]
699699
; SSE2-NEXT: pand %xmm3, %xmm1
700700
; SSE2-NEXT: packuswb %xmm2, %xmm1
701701
; SSE2-NEXT: psubb %xmm1, %xmm0

llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -787,13 +787,13 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind {
787787
; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm1[8],xmm3[9],xmm1[9],xmm3[10],xmm1[10],xmm3[11],xmm1[11],xmm3[12],xmm1[12],xmm3[13],xmm1[13],xmm3[14],xmm1[14],xmm3[15],xmm1[15]
788788
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [64,64,32,32,32,128,128,64]
789789
; SSE2-NEXT: psrlw $8, %xmm3
790-
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [14,13,12,11,10,9,9,7]
790+
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [14,u,13,u,12,u,11,u,10,u,9,u,9,u,7,u]
791791
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
792792
; SSE2-NEXT: pand %xmm4, %xmm3
793793
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
794794
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [64,256,128,32,32,32,64,64]
795795
; SSE2-NEXT: psrlw $8, %xmm2
796-
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [7,8,9,10,11,12,13,14]
796+
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [7,u,8,u,9,u,10,u,11,u,12,u,13,u,14,u]
797797
; SSE2-NEXT: pand %xmm4, %xmm2
798798
; SSE2-NEXT: packuswb %xmm3, %xmm2
799799
; SSE2-NEXT: psubb %xmm2, %xmm0

llvm/test/CodeGen/X86/vector-mul.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ define <16 x i8> @mul_v16i8_1_2_4_8_1_2_4_8_1_2_4_8_1_2_4_8(<16 x i8> %a0) nounw
250250
; SSE2: # %bb.0:
251251
; SSE2-NEXT: movdqa %xmm0, %xmm1
252252
; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
253-
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,1,2,4,8]
253+
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,u,2,u,4,u,8,u,1,u,2,u,4,u,8,u]
254254
; SSE2-NEXT: pmullw %xmm2, %xmm1
255255
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
256256
; SSE2-NEXT: pand %xmm3, %xmm1
@@ -1058,11 +1058,11 @@ define <16 x i8> @mul_v16i8_2_3_9_17_33_65_129_2_3_9_17_33_65_129_2_3(<16 x i8>
10581058
; X86-SSE2: # %bb.0:
10591059
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
10601060
; X86-SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1061-
; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [3,9,17,33,65,129,2,3]
1061+
; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [3,u,9,u,17,u,33,u,65,u,129,u,2,u,3,u]
10621062
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
10631063
; X86-SSE2-NEXT: pand %xmm2, %xmm1
10641064
; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1065-
; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [2,3,9,17,33,65,129,2]
1065+
; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [2,u,3,u,9,u,17,u,33,u,65,u,129,u,2,u]
10661066
; X86-SSE2-NEXT: pand %xmm2, %xmm0
10671067
; X86-SSE2-NEXT: packuswb %xmm1, %xmm0
10681068
; X86-SSE2-NEXT: retl
@@ -1081,11 +1081,11 @@ define <16 x i8> @mul_v16i8_2_3_9_17_33_65_129_2_3_9_17_33_65_129_2_3(<16 x i8>
10811081
; X64-SSE2: # %bb.0:
10821082
; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
10831083
; X64-SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1084-
; X64-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3,9,17,33,65,129,2,3]
1084+
; X64-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3,u,9,u,17,u,33,u,65,u,129,u,2,u,3,u]
10851085
; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
10861086
; X64-SSE2-NEXT: pand %xmm2, %xmm1
10871087
; X64-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1088-
; X64-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2,3,9,17,33,65,129,2]
1088+
; X64-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2,u,3,u,9,u,17,u,33,u,65,u,129,u,2,u]
10891089
; X64-SSE2-NEXT: pand %xmm2, %xmm0
10901090
; X64-SSE2-NEXT: packuswb %xmm1, %xmm0
10911091
; X64-SSE2-NEXT: retq
@@ -1832,7 +1832,7 @@ define <16 x i8> @mul_v16i8_0_1_3_7_15_31_63_127_0_1_3_7_15_31_63_127(<16 x i8>
18321832
; SSE2: # %bb.0:
18331833
; SSE2-NEXT: movdqa %xmm0, %xmm1
18341834
; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1835-
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,1,3,7,15,31,63,127]
1835+
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,u,1,u,3,u,7,u,15,u,31,u,63,u,127,u]
18361836
; SSE2-NEXT: pmullw %xmm2, %xmm1
18371837
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
18381838
; SSE2-NEXT: pand %xmm3, %xmm1

llvm/test/CodeGen/X86/vector-shift-shl-128.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1151,11 +1151,11 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
11511151
; SSE2: # %bb.0:
11521152
; SSE2-NEXT: movdqa %xmm0, %xmm1
11531153
; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1154-
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [128,64,32,16,8,4,2,1]
1154+
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [128,u,64,u,32,u,16,u,8,u,4,u,2,u,1,u]
11551155
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
11561156
; SSE2-NEXT: pand %xmm2, %xmm1
11571157
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1158-
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2,4,8,16,32,64,128]
1158+
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,u,2,u,4,u,8,u,16,u,32,u,64,u,128,u]
11591159
; SSE2-NEXT: pand %xmm2, %xmm0
11601160
; SSE2-NEXT: packuswb %xmm1, %xmm0
11611161
; SSE2-NEXT: retq
@@ -1232,11 +1232,11 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
12321232
; X86-SSE: # %bb.0:
12331233
; X86-SSE-NEXT: movdqa %xmm0, %xmm1
12341234
; X86-SSE-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1235-
; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [128,64,32,16,8,4,2,1]
1235+
; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [128,u,64,u,32,u,16,u,8,u,4,u,2,u,1,u]
12361236
; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
12371237
; X86-SSE-NEXT: pand %xmm2, %xmm1
12381238
; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1239-
; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [1,2,4,8,16,32,64,128]
1239+
; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [1,u,2,u,4,u,8,u,16,u,32,u,64,u,128,u]
12401240
; X86-SSE-NEXT: pand %xmm2, %xmm0
12411241
; X86-SSE-NEXT: packuswb %xmm1, %xmm0
12421242
; X86-SSE-NEXT: retl

0 commit comments

Comments
 (0)