@@ -73,17 +73,20 @@ define void @program_1(ptr %dest, ptr %t0, <4 x float> %p0, <4 x float> %p1, <4
7373; X86-NEXT: xorl %esi, %esi
7474; X86-NEXT: xorps %xmm3, %xmm3
7575; X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
76- ; X86-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload
76+ ; X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload
7777; X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload
7878; X86-NEXT: calll *%esi
7979; X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
8080; X86-NEXT: minps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
8181; X86-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
82- ; X86-NEXT: pxor %xmm1 , %xmm1
83- ; X86-NEXT: psubd {{[- 0-9]+}}(%e{{[sb]}}p) , %xmm1 ## 16-byte Folded Reload
82+ ; X86-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p) , %xmm1 ## 16-byte Reload
83+ ; X86-NEXT: psubd {{\.?LCPI[ 0-9]+_[0-9]+}} , %xmm1
8484; X86-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
85- ; X86-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
85+ ; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
8686; X86-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
87+ ; X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
88+ ; X86-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
89+ ; X86-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
8790; X86-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
8891; X86-NEXT: por %xmm1, %xmm0
8992; X86-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
@@ -108,10 +111,8 @@ define void @program_1(ptr %dest, ptr %t0, <4 x float> %p0, <4 x float> %p1, <4
108111; X64-NEXT: retq
109112; X64-NEXT: LBB0_3: ## %forbody
110113; X64-NEXT: pushq %rbx
111- ; X64-NEXT: subq $64, %rsp
112- ; X64-NEXT: xorps %xmm0, %xmm0
113- ; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
114- ; X64-NEXT: movaps {{.*#+}} xmm1 = [1.28E+2,1.28E+2,1.28E+2,1.28E+2]
114+ ; X64-NEXT: subq $48, %rsp
115+ ; X64-NEXT: movaps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
115116; X64-NEXT: minps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
116117; X64-NEXT: cvttps2dq %xmm1, %xmm0
117118; X64-NEXT: cvtdq2ps %xmm0, %xmm0
@@ -162,17 +163,19 @@ define void @program_1(ptr %dest, ptr %t0, <4 x float> %p0, <4 x float> %p1, <4
162163; X64-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload
163164; X64-NEXT: minps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
164165; X64-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill
166+ ; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
167+ ; X64-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
165168; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
166- ; X64-NEXT: psubd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload
167- ; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
168- ; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
169- ; X64-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
169+ ; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
170170; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
171171; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
172- ; X64-NEXT: orps {{[- 0-9]+}}(%r{{[sb]}}p ), %xmm0 ## 16-byte Folded Reload
172+ ; X64-NEXT: xorps {{\.?LCPI[0-9]+_[ 0-9]+}}(%rip ), %xmm0
173173; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
174- ; X64-NEXT: xorps %xmm3, %xmm3
175- ; X64-NEXT: xorps %xmm4, %xmm4
174+ ; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
175+ ; X64-NEXT: por %xmm1, %xmm0
176+ ; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
177+ ; X64-NEXT: xorps %xmm3, %xmm3
178+ ; X64-NEXT: xorps %xmm4, %xmm4
176179; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
177180; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
178181; X64-NEXT: movaps (%rsp), %xmm2 ## 16-byte Reload
0 commit comments