@@ -277,56 +277,56 @@ define <8 x half> @fmul_pow2_8xhalf(<8 x i16> %i) {
277277; CHECK-AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
278278; CHECK-AVX2-NEXT: vzeroupper
279279; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
280- ; CHECK-AVX2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
280+ ; CHECK-AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
281281; CHECK-AVX2-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
282282; CHECK-AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
283283; CHECK-AVX2-NEXT: vzeroupper
284284; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
285- ; CHECK-AVX2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
285+ ; CHECK-AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
286286; CHECK-AVX2-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
287287; CHECK-AVX2-NEXT: # xmm0 = mem[1,0]
288288; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
289- ; CHECK-AVX2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
289+ ; CHECK-AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
290290; CHECK-AVX2-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
291291; CHECK-AVX2-NEXT: # xmm0 = mem[3,3,3,3]
292292; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
293- ; CHECK-AVX2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
293+ ; CHECK-AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
294294; CHECK-AVX2-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
295295; CHECK-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0
296296; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
297297; CHECK-AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
298298; CHECK-AVX2-NEXT: vzeroupper
299299; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
300- ; CHECK-AVX2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
300+ ; CHECK-AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
301301; CHECK-AVX2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
302302; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
303- ; CHECK-AVX2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
303+ ; CHECK-AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
304304; CHECK-AVX2-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
305305; CHECK-AVX2-NEXT: # xmm0 = mem[1,0]
306306; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
307- ; CHECK-AVX2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
307+ ; CHECK-AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
308308; CHECK-AVX2-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
309309; CHECK-AVX2-NEXT: # xmm0 = mem[3,3,3,3]
310310; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
311311; CHECK-AVX2-NEXT: callq __extendhfsf2@PLT
312312; CHECK-AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
313313; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
314314; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
315- ; CHECK-AVX2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
315+ ; CHECK-AVX2-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
316316; CHECK-AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero
317317; CHECK-AVX2-NEXT: callq __extendhfsf2@PLT
318318; CHECK-AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
319319; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
320320; CHECK-AVX2-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
321321; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
322322; CHECK-AVX2-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
323- ; CHECK-AVX2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
323+ ; CHECK-AVX2-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
324324; CHECK-AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero
325325; CHECK-AVX2-NEXT: callq __extendhfsf2@PLT
326326; CHECK-AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
327327; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
328328; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
329- ; CHECK-AVX2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
329+ ; CHECK-AVX2-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
330330; CHECK-AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero
331331; CHECK-AVX2-NEXT: callq __extendhfsf2@PLT
332332; CHECK-AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
@@ -336,27 +336,27 @@ define <8 x half> @fmul_pow2_8xhalf(<8 x i16> %i) {
336336; CHECK-AVX2-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
337337; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
338338; CHECK-AVX2-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
339- ; CHECK-AVX2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
339+ ; CHECK-AVX2-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
340340; CHECK-AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero
341341; CHECK-AVX2-NEXT: callq __extendhfsf2@PLT
342342; CHECK-AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
343343; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
344344; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
345- ; CHECK-AVX2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
345+ ; CHECK-AVX2-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
346346; CHECK-AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero
347347; CHECK-AVX2-NEXT: callq __extendhfsf2@PLT
348348; CHECK-AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
349349; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
350350; CHECK-AVX2-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
351351; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
352352; CHECK-AVX2-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
353- ; CHECK-AVX2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
353+ ; CHECK-AVX2-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
354354; CHECK-AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero
355355; CHECK-AVX2-NEXT: callq __extendhfsf2@PLT
356356; CHECK-AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
357357; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
358358; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
359- ; CHECK-AVX2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
359+ ; CHECK-AVX2-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
360360; CHECK-AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero
361361; CHECK-AVX2-NEXT: callq __extendhfsf2@PLT
362362; CHECK-AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
@@ -1111,7 +1111,7 @@ define <2 x half> @fmul_pow_shl_cnt_vec_fail_to_large(<2 x i16> %cnt) nounwind {
11111111; CHECK-AVX2-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
11121112; CHECK-AVX2-NEXT: vzeroupper
11131113; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
1114- ; CHECK-AVX2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1114+ ; CHECK-AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
11151115; CHECK-AVX2-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
11161116; CHECK-AVX2-NEXT: vpextrw $0, %xmm0, %eax
11171117; CHECK-AVX2-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
@@ -1121,7 +1121,7 @@ define <2 x half> @fmul_pow_shl_cnt_vec_fail_to_large(<2 x i16> %cnt) nounwind {
11211121; CHECK-AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
11221122; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
11231123; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1124- ; CHECK-AVX2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1124+ ; CHECK-AVX2-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
11251125; CHECK-AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero
11261126; CHECK-AVX2-NEXT: callq __extendhfsf2@PLT
11271127; CHECK-AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
0 commit comments