Skip to content

Commit bf60bae

Browse files
rotaterighttstellar
authored andcommitted
Revert "[x86] try harder to scalarize a vector load with extracted integer op uses"
This reverts commit b4b97ec. As discussed in post-commit feedback at: https://reviews.llvm.org/D118376 ...there's a stage 2 failure on a Mac running a clang-refactor tool test. (cherry picked from commit 7b03725)
1 parent 54a8365 commit bf60bae

11 files changed

+597
-410
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -43108,38 +43108,6 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
4310843108
}
4310943109
}
4311043110

43111-
// If this extract is from a loaded vector value and will be used as an
43112-
// integer, that requires a potentially expensive XMM -> GPR transfer.
43113-
// Additionally, if we can convert to a scalar integer load, that will likely
43114-
// be folded into a subsequent integer op.
43115-
// Note: Unlike the related fold for this in DAGCombiner, this is not limited
43116-
// to a single-use of the loaded vector. For the reasons above, we
43117-
// expect this to be profitable even if it creates an extra load.
43118-
bool LikelyUsedAsVector = any_of(N->uses(), [](SDNode *Use) {
43119-
return Use->getOpcode() == ISD::STORE ||
43120-
Use->getOpcode() == ISD::INSERT_VECTOR_ELT ||
43121-
Use->getOpcode() == ISD::SCALAR_TO_VECTOR;
43122-
});
43123-
auto *LoadVec = dyn_cast<LoadSDNode>(InputVector);
43124-
if (LoadVec && CIdx && ISD::isNormalLoad(LoadVec) && VT.isInteger() &&
43125-
SrcVT.getVectorElementType() == VT && DCI.isAfterLegalizeDAG() &&
43126-
!LikelyUsedAsVector) {
43127-
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
43128-
SDValue NewPtr =
43129-
TLI.getVectorElementPointer(DAG, LoadVec->getBasePtr(), SrcVT, EltIdx);
43130-
unsigned PtrOff = VT.getSizeInBits() * CIdx->getZExtValue() / 8;
43131-
MachinePointerInfo MPI = LoadVec->getPointerInfo().getWithOffset(PtrOff);
43132-
Align Alignment = commonAlignment(LoadVec->getAlign(), PtrOff);
43133-
SDValue Load =
43134-
DAG.getLoad(VT, dl, LoadVec->getChain(), NewPtr, MPI, Alignment,
43135-
LoadVec->getMemOperand()->getFlags(), LoadVec->getAAInfo());
43136-
SDValue Chain = Load.getValue(1);
43137-
SDValue From[] = {SDValue(N, 0), SDValue(LoadVec, 1)};
43138-
SDValue To[] = {Load, Chain};
43139-
DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
43140-
return SDValue(N, 0);
43141-
}
43142-
4314343111
return SDValue();
4314443112
}
4314543113

llvm/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@
1010
define <4 x i32> @test(<4 x i32>* %p) {
1111
; CHECK-LABEL: test:
1212
; CHECK: # %bb.0:
13-
; CHECK-NEXT: cmpl $3, 8(%rdi)
14-
; CHECK-NEXT: je .LBB0_1
15-
; CHECK-NEXT: # %bb.2:
16-
; CHECK-NEXT: xorps %xmm0, %xmm0
17-
; CHECK-NEXT: retq
18-
; CHECK-NEXT: .LBB0_1:
1913
; CHECK-NEXT: movaps (%rdi), %xmm0
14+
; CHECK-NEXT: extractps $2, %xmm0, %eax
15+
; CHECK-NEXT: cmpl $3, %eax
16+
; CHECK-NEXT: je .LBB0_2
17+
; CHECK-NEXT: # %bb.1:
18+
; CHECK-NEXT: xorps %xmm0, %xmm0
19+
; CHECK-NEXT: .LBB0_2:
2020
; CHECK-NEXT: retq
2121
%v = load <4 x i32>, <4 x i32>* %p
2222
%e = extractelement <4 x i32> %v, i32 2

llvm/test/CodeGen/X86/avx512-cvt.ll

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -148,12 +148,18 @@ define <2 x float> @sltof2f32(<2 x i64> %a) {
148148
define <4 x float> @slto4f32_mem(<4 x i64>* %a) {
149149
; NODQ-LABEL: slto4f32_mem:
150150
; NODQ: # %bb.0:
151-
; NODQ-NEXT: vcvtsi2ssq 8(%rdi), %xmm0, %xmm0
152-
; NODQ-NEXT: vcvtsi2ssq (%rdi), %xmm1, %xmm1
153-
; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
154-
; NODQ-NEXT: vcvtsi2ssq 16(%rdi), %xmm2, %xmm1
155-
; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
156-
; NODQ-NEXT: vcvtsi2ssq 24(%rdi), %xmm2, %xmm1
151+
; NODQ-NEXT: vmovdqu (%rdi), %xmm0
152+
; NODQ-NEXT: vmovdqu 16(%rdi), %xmm1
153+
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
154+
; NODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
155+
; NODQ-NEXT: vmovq %xmm0, %rax
156+
; NODQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
157+
; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3]
158+
; NODQ-NEXT: vmovq %xmm1, %rax
159+
; NODQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
160+
; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
161+
; NODQ-NEXT: vpextrq $1, %xmm1, %rax
162+
; NODQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1
157163
; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
158164
; NODQ-NEXT: retq
159165
;

llvm/test/CodeGen/X86/bitcast-vector-bool.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -542,8 +542,10 @@ define i32 @bitcast_v64i8_to_v2i32(<64 x i8> %a0) nounwind {
542542
; AVX512: # %bb.0:
543543
; AVX512-NEXT: vpmovb2m %zmm0, %k0
544544
; AVX512-NEXT: kmovq %k0, -{{[0-9]+}}(%rsp)
545-
; AVX512-NEXT: movl -{{[0-9]+}}(%rsp), %eax
546-
; AVX512-NEXT: addl -{{[0-9]+}}(%rsp), %eax
545+
; AVX512-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0
546+
; AVX512-NEXT: vmovd %xmm0, %ecx
547+
; AVX512-NEXT: vpextrd $1, %xmm0, %eax
548+
; AVX512-NEXT: addl %ecx, %eax
547549
; AVX512-NEXT: vzeroupper
548550
; AVX512-NEXT: retq
549551
%1 = icmp slt <64 x i8> %a0, zeroinitializer

llvm/test/CodeGen/X86/extractelement-load.ll

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -301,35 +301,33 @@ define void @subextract_broadcast_load_constant(<2 x i16>* nocapture %0, i16* no
301301
ret void
302302
}
303303

304-
; A scalar load is favored over a XMM->GPR register transfer in this example.
305-
306304
define i32 @multi_use_load_scalarization(<4 x i32>* %p) {
307305
; X32-SSE2-LABEL: multi_use_load_scalarization:
308306
; X32-SSE2: # %bb.0:
309307
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
310-
; X32-SSE2-NEXT: movl (%ecx), %eax
311308
; X32-SSE2-NEXT: movdqu (%ecx), %xmm0
312309
; X32-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
310+
; X32-SSE2-NEXT: movd %xmm0, %eax
313311
; X32-SSE2-NEXT: psubd %xmm1, %xmm0
314312
; X32-SSE2-NEXT: movdqa %xmm0, (%ecx)
315313
; X32-SSE2-NEXT: retl
316314
;
317315
; X64-SSSE3-LABEL: multi_use_load_scalarization:
318316
; X64-SSSE3: # %bb.0:
319-
; X64-SSSE3-NEXT: movl (%rdi), %eax
320317
; X64-SSSE3-NEXT: movdqu (%rdi), %xmm0
321318
; X64-SSSE3-NEXT: pcmpeqd %xmm1, %xmm1
319+
; X64-SSSE3-NEXT: movd %xmm0, %eax
322320
; X64-SSSE3-NEXT: psubd %xmm1, %xmm0
323321
; X64-SSSE3-NEXT: movdqa %xmm0, (%rdi)
324322
; X64-SSSE3-NEXT: retq
325323
;
326324
; X64-AVX-LABEL: multi_use_load_scalarization:
327325
; X64-AVX: # %bb.0:
328-
; X64-AVX-NEXT: movl (%rdi), %eax
329326
; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
330327
; X64-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
331-
; X64-AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0
332-
; X64-AVX-NEXT: vmovdqa %xmm0, (%rdi)
328+
; X64-AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm1
329+
; X64-AVX-NEXT: vmovdqa %xmm1, (%rdi)
330+
; X64-AVX-NEXT: vmovd %xmm0, %eax
333331
; X64-AVX-NEXT: retq
334332
%v = load <4 x i32>, <4 x i32>* %p, align 1
335333
%v1 = add <4 x i32> %v, <i32 1, i32 1, i32 1, i32 1>

llvm/test/CodeGen/X86/oddsubvector.ll

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -161,46 +161,46 @@ define <16 x i32> @PR42819(<8 x i32>* %a0) {
161161
define void @PR42833() {
162162
; SSE2-LABEL: PR42833:
163163
; SSE2: # %bb.0:
164-
; SSE2-NEXT: movl b(%rip), %eax
165-
; SSE2-NEXT: movdqa c+144(%rip), %xmm0
166-
; SSE2-NEXT: movdqa c+128(%rip), %xmm1
167-
; SSE2-NEXT: addl c+128(%rip), %eax
164+
; SSE2-NEXT: movdqa c+144(%rip), %xmm1
165+
; SSE2-NEXT: movdqa c+128(%rip), %xmm0
166+
; SSE2-NEXT: movd %xmm0, %eax
167+
; SSE2-NEXT: addl b(%rip), %eax
168168
; SSE2-NEXT: movd %eax, %xmm2
169169
; SSE2-NEXT: movd %eax, %xmm3
170-
; SSE2-NEXT: paddd %xmm1, %xmm3
170+
; SSE2-NEXT: paddd %xmm0, %xmm3
171171
; SSE2-NEXT: movdqa d+144(%rip), %xmm4
172-
; SSE2-NEXT: psubd %xmm0, %xmm4
173-
; SSE2-NEXT: paddd %xmm0, %xmm0
174-
; SSE2-NEXT: movdqa %xmm1, %xmm5
175-
; SSE2-NEXT: paddd %xmm1, %xmm5
172+
; SSE2-NEXT: psubd %xmm1, %xmm4
173+
; SSE2-NEXT: paddd %xmm1, %xmm1
174+
; SSE2-NEXT: movdqa %xmm0, %xmm5
175+
; SSE2-NEXT: paddd %xmm0, %xmm5
176176
; SSE2-NEXT: movss {{.*#+}} xmm5 = xmm3[0],xmm5[1,2,3]
177-
; SSE2-NEXT: movdqa %xmm0, c+144(%rip)
177+
; SSE2-NEXT: movdqa %xmm1, c+144(%rip)
178178
; SSE2-NEXT: movaps %xmm5, c+128(%rip)
179-
; SSE2-NEXT: movdqa c+160(%rip), %xmm0
179+
; SSE2-NEXT: movdqa c+160(%rip), %xmm1
180180
; SSE2-NEXT: movdqa c+176(%rip), %xmm3
181181
; SSE2-NEXT: movdqa d+160(%rip), %xmm5
182182
; SSE2-NEXT: movdqa d+176(%rip), %xmm6
183183
; SSE2-NEXT: movdqa d+128(%rip), %xmm7
184-
; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3]
185-
; SSE2-NEXT: psubd %xmm1, %xmm7
184+
; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
185+
; SSE2-NEXT: psubd %xmm0, %xmm7
186186
; SSE2-NEXT: psubd %xmm3, %xmm6
187-
; SSE2-NEXT: psubd %xmm0, %xmm5
187+
; SSE2-NEXT: psubd %xmm1, %xmm5
188188
; SSE2-NEXT: movdqa %xmm5, d+160(%rip)
189189
; SSE2-NEXT: movdqa %xmm6, d+176(%rip)
190190
; SSE2-NEXT: movdqa %xmm4, d+144(%rip)
191191
; SSE2-NEXT: movdqa %xmm7, d+128(%rip)
192192
; SSE2-NEXT: paddd %xmm3, %xmm3
193-
; SSE2-NEXT: paddd %xmm0, %xmm0
194-
; SSE2-NEXT: movdqa %xmm0, c+160(%rip)
193+
; SSE2-NEXT: paddd %xmm1, %xmm1
194+
; SSE2-NEXT: movdqa %xmm1, c+160(%rip)
195195
; SSE2-NEXT: movdqa %xmm3, c+176(%rip)
196196
; SSE2-NEXT: retq
197197
;
198198
; SSE42-LABEL: PR42833:
199199
; SSE42: # %bb.0:
200-
; SSE42-NEXT: movl b(%rip), %eax
201200
; SSE42-NEXT: movdqa c+144(%rip), %xmm0
202201
; SSE42-NEXT: movdqa c+128(%rip), %xmm1
203-
; SSE42-NEXT: addl c+128(%rip), %eax
202+
; SSE42-NEXT: movd %xmm1, %eax
203+
; SSE42-NEXT: addl b(%rip), %eax
204204
; SSE42-NEXT: movd %eax, %xmm2
205205
; SSE42-NEXT: paddd %xmm1, %xmm2
206206
; SSE42-NEXT: movdqa d+144(%rip), %xmm3
@@ -232,20 +232,20 @@ define void @PR42833() {
232232
;
233233
; AVX1-LABEL: PR42833:
234234
; AVX1: # %bb.0:
235-
; AVX1-NEXT: movl b(%rip), %eax
236-
; AVX1-NEXT: addl c+128(%rip), %eax
237-
; AVX1-NEXT: vmovd %eax, %xmm0
238-
; AVX1-NEXT: vmovdqa c+128(%rip), %xmm1
239-
; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0
240-
; AVX1-NEXT: vpaddd %xmm1, %xmm1, %xmm2
235+
; AVX1-NEXT: vmovdqa c+128(%rip), %xmm0
236+
; AVX1-NEXT: vmovd %xmm0, %eax
237+
; AVX1-NEXT: addl b(%rip), %eax
238+
; AVX1-NEXT: vmovd %eax, %xmm1
239+
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
240+
; AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm2
241241
; AVX1-NEXT: vmovdqa c+144(%rip), %xmm3
242242
; AVX1-NEXT: vpaddd %xmm3, %xmm3, %xmm3
243243
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
244-
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7]
244+
; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3,4,5,6,7]
245245
; AVX1-NEXT: vmovdqa d+144(%rip), %xmm2
246246
; AVX1-NEXT: vpsubd c+144(%rip), %xmm2, %xmm2
247-
; AVX1-NEXT: vmovups %ymm0, c+128(%rip)
248-
; AVX1-NEXT: vpinsrd $0, %eax, %xmm1, %xmm0
247+
; AVX1-NEXT: vmovups %ymm1, c+128(%rip)
248+
; AVX1-NEXT: vpinsrd $0, %eax, %xmm0, %xmm0
249249
; AVX1-NEXT: vmovdqa d+128(%rip), %xmm1
250250
; AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm0
251251
; AVX1-NEXT: vmovdqa d+176(%rip), %xmm1
@@ -314,20 +314,20 @@ define void @PR42833() {
314314
;
315315
; XOP-LABEL: PR42833:
316316
; XOP: # %bb.0:
317-
; XOP-NEXT: movl b(%rip), %eax
318-
; XOP-NEXT: addl c+128(%rip), %eax
319-
; XOP-NEXT: vmovd %eax, %xmm0
320-
; XOP-NEXT: vmovdqa c+128(%rip), %xmm1
321-
; XOP-NEXT: vpaddd %xmm0, %xmm1, %xmm0
322-
; XOP-NEXT: vpaddd %xmm1, %xmm1, %xmm2
317+
; XOP-NEXT: vmovdqa c+128(%rip), %xmm0
318+
; XOP-NEXT: vmovd %xmm0, %eax
319+
; XOP-NEXT: addl b(%rip), %eax
320+
; XOP-NEXT: vmovd %eax, %xmm1
321+
; XOP-NEXT: vpaddd %xmm1, %xmm0, %xmm1
322+
; XOP-NEXT: vpaddd %xmm0, %xmm0, %xmm2
323323
; XOP-NEXT: vmovdqa c+144(%rip), %xmm3
324324
; XOP-NEXT: vpaddd %xmm3, %xmm3, %xmm3
325325
; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
326-
; XOP-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7]
326+
; XOP-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3,4,5,6,7]
327327
; XOP-NEXT: vmovdqa d+144(%rip), %xmm2
328328
; XOP-NEXT: vpsubd c+144(%rip), %xmm2, %xmm2
329-
; XOP-NEXT: vmovups %ymm0, c+128(%rip)
330-
; XOP-NEXT: vpinsrd $0, %eax, %xmm1, %xmm0
329+
; XOP-NEXT: vmovups %ymm1, c+128(%rip)
330+
; XOP-NEXT: vpinsrd $0, %eax, %xmm0, %xmm0
331331
; XOP-NEXT: vmovdqa d+128(%rip), %xmm1
332332
; XOP-NEXT: vpsubd %xmm0, %xmm1, %xmm0
333333
; XOP-NEXT: vmovdqa d+176(%rip), %xmm1

llvm/test/CodeGen/X86/pr45378.ll

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -76,23 +76,28 @@ define i1 @parseHeaders2_scalar_and(i64 * %ptr) nounwind {
7676
; SSE2-LABEL: parseHeaders2_scalar_and:
7777
; SSE2: # %bb.0:
7878
; SSE2-NEXT: movdqu (%rdi), %xmm0
79-
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
8079
; SSE2-NEXT: movq %xmm0, %rax
81-
; SSE2-NEXT: testq %rax, (%rdi)
80+
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
81+
; SSE2-NEXT: movq %xmm0, %rcx
82+
; SSE2-NEXT: testq %rcx, %rax
8283
; SSE2-NEXT: sete %al
8384
; SSE2-NEXT: retq
8485
;
8586
; SSE41-LABEL: parseHeaders2_scalar_and:
8687
; SSE41: # %bb.0:
87-
; SSE41-NEXT: movq (%rdi), %rax
88-
; SSE41-NEXT: testq %rax, 8(%rdi)
88+
; SSE41-NEXT: movdqu (%rdi), %xmm0
89+
; SSE41-NEXT: movq %xmm0, %rax
90+
; SSE41-NEXT: pextrq $1, %xmm0, %rcx
91+
; SSE41-NEXT: testq %rcx, %rax
8992
; SSE41-NEXT: sete %al
9093
; SSE41-NEXT: retq
9194
;
9295
; AVX-LABEL: parseHeaders2_scalar_and:
9396
; AVX: # %bb.0:
94-
; AVX-NEXT: movq (%rdi), %rax
95-
; AVX-NEXT: testq %rax, 8(%rdi)
97+
; AVX-NEXT: vmovdqu (%rdi), %xmm0
98+
; AVX-NEXT: vmovq %xmm0, %rax
99+
; AVX-NEXT: vpextrq $1, %xmm0, %rcx
100+
; AVX-NEXT: testq %rcx, %rax
96101
; AVX-NEXT: sete %al
97102
; AVX-NEXT: retq
98103
%vptr = bitcast i64 * %ptr to <2 x i64> *

llvm/test/CodeGen/X86/scalar_widen_div.ll

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -403,29 +403,32 @@ define void @test_int_div(<3 x i32>* %dest, <3 x i32>* %old, i32 %n) {
403403
; CHECK-NEXT: testl %edx, %edx
404404
; CHECK-NEXT: jle .LBB12_3
405405
; CHECK-NEXT: # %bb.1: # %bb.nph
406-
; CHECK-NEXT: movl %edx, %r11d
406+
; CHECK-NEXT: movl %edx, %r9d
407407
; CHECK-NEXT: xorl %ecx, %ecx
408408
; CHECK-NEXT: .p2align 4, 0x90
409409
; CHECK-NEXT: .LBB12_2: # %for.body
410410
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
411-
; CHECK-NEXT: movl 8(%rdi,%rcx), %r8d
412-
; CHECK-NEXT: movl (%rdi,%rcx), %r9d
413-
; CHECK-NEXT: movl 4(%rdi,%rcx), %eax
411+
; CHECK-NEXT: movdqa (%rdi,%rcx), %xmm0
412+
; CHECK-NEXT: movdqa (%rsi,%rcx), %xmm1
413+
; CHECK-NEXT: pextrd $1, %xmm0, %eax
414+
; CHECK-NEXT: pextrd $1, %xmm1, %r8d
414415
; CHECK-NEXT: cltd
415-
; CHECK-NEXT: idivl 4(%rsi,%rcx)
416-
; CHECK-NEXT: movl %eax, %r10d
417-
; CHECK-NEXT: movl %r9d, %eax
416+
; CHECK-NEXT: idivl %r8d
417+
; CHECK-NEXT: movl %eax, %r8d
418+
; CHECK-NEXT: movd %xmm0, %eax
419+
; CHECK-NEXT: movd %xmm1, %r10d
418420
; CHECK-NEXT: cltd
419-
; CHECK-NEXT: idivl (%rsi,%rcx)
420-
; CHECK-NEXT: movd %eax, %xmm0
421-
; CHECK-NEXT: pinsrd $1, %r10d, %xmm0
422-
; CHECK-NEXT: movl %r8d, %eax
421+
; CHECK-NEXT: idivl %r10d
422+
; CHECK-NEXT: movd %eax, %xmm2
423+
; CHECK-NEXT: pinsrd $1, %r8d, %xmm2
424+
; CHECK-NEXT: pextrd $2, %xmm0, %eax
425+
; CHECK-NEXT: pextrd $2, %xmm1, %r8d
423426
; CHECK-NEXT: cltd
424-
; CHECK-NEXT: idivl 8(%rsi,%rcx)
427+
; CHECK-NEXT: idivl %r8d
425428
; CHECK-NEXT: movl %eax, 8(%rdi,%rcx)
426-
; CHECK-NEXT: movq %xmm0, (%rdi,%rcx)
429+
; CHECK-NEXT: movq %xmm2, (%rdi,%rcx)
427430
; CHECK-NEXT: addq $16, %rcx
428-
; CHECK-NEXT: decl %r11d
431+
; CHECK-NEXT: decl %r9d
429432
; CHECK-NEXT: jne .LBB12_2
430433
; CHECK-NEXT: .LBB12_3: # %for.end
431434
; CHECK-NEXT: retq

0 commit comments

Comments
 (0)