Skip to content

Commit d197621

Browse files
author
Yuanke Luo
committed
[X86] Fix spill issue for fr16
When avx512fp16 is not available, we use MOVSS to spill fr16/fr16x register. However The MOVSSmr require fr32 register class and MOVSSrm require vr128 register class which cause bad instruction detected by machine verifier. To fix the issue this patch is to create a pseudo instruction MOVSHP for fr16 register spilling. MOVSHP is expanded to MOVSS or VMOVSSZ depending on the register number.
1 parent 3134e69 commit d197621

File tree

8 files changed

+156
-29
lines changed

8 files changed

+156
-29
lines changed

llvm/lib/Target/X86/X86InstrInfo.cpp

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4399,13 +4399,8 @@ static unsigned getLoadStoreOpcodeForFP16(bool Load, const X86Subtarget &STI) {
43994399
if (STI.hasFP16())
44004400
return Load ? X86::VMOVSHZrm_alt : X86::VMOVSHZmr;
44014401
if (Load)
4402-
return STI.hasAVX512() ? X86::VMOVSSZrm
4403-
: STI.hasAVX() ? X86::VMOVSSrm
4404-
: X86::MOVSSrm;
4405-
else
4406-
return STI.hasAVX512() ? X86::VMOVSSZmr
4407-
: STI.hasAVX() ? X86::VMOVSSmr
4408-
: X86::MOVSSmr;
4402+
return X86::MOVSHPrm;
4403+
return X86::MOVSHPmr;
44094404
}
44104405

44114406
static unsigned getLoadStoreRegOpcode(Register Reg,
@@ -6131,6 +6126,25 @@ static bool expandSHXDROT(MachineInstrBuilder &MIB, const MCInstrDesc &Desc) {
61316126
return true;
61326127
}
61336128

6129+
static bool expandMOVSHP(MachineInstrBuilder &MIB, MachineInstr &MI,
6130+
const TargetInstrInfo &TII) {
6131+
unsigned NewOpc;
6132+
if (MI.getOpcode() == X86::MOVSHPrm) {
6133+
NewOpc = X86::MOVSSrm;
6134+
Register Reg = MI.getOperand(0).getReg();
6135+
if (Reg > X86::XMM15)
6136+
NewOpc = X86::VMOVSSZrm;
6137+
} else {
6138+
NewOpc = X86::MOVSSmr;
6139+
Register Reg = MI.getOperand(5).getReg();
6140+
if (Reg > X86::XMM15)
6141+
NewOpc = X86::VMOVSSZmr;
6142+
}
6143+
6144+
MIB->setDesc(TII.get(NewOpc));
6145+
return true;
6146+
}
6147+
61346148
bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
61356149
bool HasAVX = Subtarget.hasAVX();
61366150
MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
@@ -6203,6 +6217,9 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
62036217
}
62046218
return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
62056219
}
6220+
case X86::MOVSHPmr:
6221+
case X86::MOVSHPrm:
6222+
return expandMOVSHP(MIB, MI, *this);
62066223
case X86::V_SETALLONES:
62076224
return Expand2AddrUndef(MIB,
62086225
get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));

llvm/lib/Target/X86/X86InstrSSE.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,18 @@ multiclass sse12_move_rm<RegisterClass RC, ValueType vt, X86MemOperand x86memop,
267267
}
268268
}
269269

270+
// pseudo instruction for fp16 spilling.
271+
let isPseudo = 1, Predicates = [HasSSE2] in {
272+
let mayStore = 1 in
273+
def MOVSHPmr : I<0, Pseudo, (outs), (ins f32mem:$dst, FR16X:$src), "",
274+
[], SSEPackedSingle>,
275+
Sched<[WriteFStore]>;
276+
let mayLoad = 1 in
277+
def MOVSHPrm : I<0, Pseudo, (outs FR16X:$dst), (ins f32mem:$src), "",
278+
[], SSEPackedSingle>,
279+
Sched<[WriteFLoad]>;
280+
}
281+
270282
defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss",
271283
SSEPackedSingle, UseSSE1>, TB, XS;
272284
defm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd",

llvm/test/CodeGen/X86/avx512fp16-combine-shuffle-fma.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@ define <2 x half> @foo(<2 x half> %0) nounwind {
1313
; AVX2-NEXT: callq __extendhfsf2@PLT
1414
; AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1515
; AVX2-NEXT: callq __truncsfhf2@PLT
16-
; AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
16+
; AVX2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1717
; AVX2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1818
; AVX2-NEXT: callq __extendhfsf2@PLT
1919
; AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
20-
; AVX2-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
20+
; AVX2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
2121
; AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero
2222
; AVX2-NEXT: callq __extendhfsf2@PLT
2323
; AVX2-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload

llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,10 +100,10 @@ define half @complex_canonicalize_fmul_half(half %a, half %b) nounwind {
100100
; AVX-LABEL: complex_canonicalize_fmul_half:
101101
; AVX: # %bb.0: # %entry
102102
; AVX-NEXT: pushq %rax
103-
; AVX-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
103+
; AVX-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
104104
; AVX-NEXT: callq __extendhfsf2@PLT
105105
; AVX-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill
106-
; AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
106+
; AVX-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
107107
; AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
108108
; AVX-NEXT: callq __extendhfsf2@PLT
109109
; AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill

llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -277,56 +277,56 @@ define <8 x half> @fmul_pow2_8xhalf(<8 x i16> %i) {
277277
; CHECK-AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
278278
; CHECK-AVX2-NEXT: vzeroupper
279279
; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
280-
; CHECK-AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
280+
; CHECK-AVX2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
281281
; CHECK-AVX2-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
282282
; CHECK-AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
283283
; CHECK-AVX2-NEXT: vzeroupper
284284
; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
285-
; CHECK-AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
285+
; CHECK-AVX2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
286286
; CHECK-AVX2-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
287287
; CHECK-AVX2-NEXT: # xmm0 = mem[1,0]
288288
; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
289-
; CHECK-AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
289+
; CHECK-AVX2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
290290
; CHECK-AVX2-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
291291
; CHECK-AVX2-NEXT: # xmm0 = mem[3,3,3,3]
292292
; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
293-
; CHECK-AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
293+
; CHECK-AVX2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
294294
; CHECK-AVX2-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
295295
; CHECK-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0
296296
; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
297297
; CHECK-AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
298298
; CHECK-AVX2-NEXT: vzeroupper
299299
; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
300-
; CHECK-AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
300+
; CHECK-AVX2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
301301
; CHECK-AVX2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
302302
; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
303-
; CHECK-AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
303+
; CHECK-AVX2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
304304
; CHECK-AVX2-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
305305
; CHECK-AVX2-NEXT: # xmm0 = mem[1,0]
306306
; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
307-
; CHECK-AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
307+
; CHECK-AVX2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
308308
; CHECK-AVX2-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
309309
; CHECK-AVX2-NEXT: # xmm0 = mem[3,3,3,3]
310310
; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
311311
; CHECK-AVX2-NEXT: callq __extendhfsf2@PLT
312312
; CHECK-AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
313313
; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
314314
; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
315-
; CHECK-AVX2-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
315+
; CHECK-AVX2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
316316
; CHECK-AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero
317317
; CHECK-AVX2-NEXT: callq __extendhfsf2@PLT
318318
; CHECK-AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
319319
; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
320320
; CHECK-AVX2-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
321321
; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
322322
; CHECK-AVX2-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
323-
; CHECK-AVX2-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
323+
; CHECK-AVX2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
324324
; CHECK-AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero
325325
; CHECK-AVX2-NEXT: callq __extendhfsf2@PLT
326326
; CHECK-AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
327327
; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
328328
; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
329-
; CHECK-AVX2-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
329+
; CHECK-AVX2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
330330
; CHECK-AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero
331331
; CHECK-AVX2-NEXT: callq __extendhfsf2@PLT
332332
; CHECK-AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
@@ -336,27 +336,27 @@ define <8 x half> @fmul_pow2_8xhalf(<8 x i16> %i) {
336336
; CHECK-AVX2-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
337337
; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
338338
; CHECK-AVX2-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
339-
; CHECK-AVX2-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
339+
; CHECK-AVX2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
340340
; CHECK-AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero
341341
; CHECK-AVX2-NEXT: callq __extendhfsf2@PLT
342342
; CHECK-AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
343343
; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
344344
; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
345-
; CHECK-AVX2-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
345+
; CHECK-AVX2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
346346
; CHECK-AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero
347347
; CHECK-AVX2-NEXT: callq __extendhfsf2@PLT
348348
; CHECK-AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
349349
; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
350350
; CHECK-AVX2-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
351351
; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
352352
; CHECK-AVX2-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
353-
; CHECK-AVX2-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
353+
; CHECK-AVX2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
354354
; CHECK-AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero
355355
; CHECK-AVX2-NEXT: callq __extendhfsf2@PLT
356356
; CHECK-AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
357357
; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
358358
; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
359-
; CHECK-AVX2-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
359+
; CHECK-AVX2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
360360
; CHECK-AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero
361361
; CHECK-AVX2-NEXT: callq __extendhfsf2@PLT
362362
; CHECK-AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
@@ -1111,7 +1111,7 @@ define <2 x half> @fmul_pow_shl_cnt_vec_fail_to_large(<2 x i16> %cnt) nounwind {
11111111
; CHECK-AVX2-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
11121112
; CHECK-AVX2-NEXT: vzeroupper
11131113
; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
1114-
; CHECK-AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1114+
; CHECK-AVX2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
11151115
; CHECK-AVX2-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
11161116
; CHECK-AVX2-NEXT: vpextrw $0, %xmm0, %eax
11171117
; CHECK-AVX2-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
@@ -1121,7 +1121,7 @@ define <2 x half> @fmul_pow_shl_cnt_vec_fail_to_large(<2 x i16> %cnt) nounwind {
11211121
; CHECK-AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
11221122
; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
11231123
; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1124-
; CHECK-AVX2-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1124+
; CHECK-AVX2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
11251125
; CHECK-AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero
11261126
; CHECK-AVX2-NEXT: callq __extendhfsf2@PLT
11271127
; CHECK-AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=x86_64-unknown -start-before=twoaddressinstruction -stop-after=postrapseudos -verify-machineinstrs -o - %s | FileCheck %s
3+
4+
...
5+
---
6+
name: test
7+
alignment: 16
8+
tracksRegLiveness: true
9+
debugInstrRef: true
10+
registers:
11+
liveins:
12+
- { reg: '$xmm0', virtual-reg: '%0' }
13+
frameInfo:
14+
maxAlignment: 1
15+
hasCalls: true
16+
machineFunctionInfo: {}
17+
body: |
18+
bb.0:
19+
liveins: $xmm0
20+
21+
; CHECK-LABEL: name: test
22+
; CHECK: liveins: $xmm0
23+
; CHECK-NEXT: {{ $}}
24+
; CHECK-NEXT: MOVSSmr $rsp, 1, $noreg, -4, $noreg, $xmm0 :: (store (s32) into %stack.0, align 2)
25+
; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $xmm0, 12 /* clobber */, implicit-def dead early-clobber $xmm1, 12 /* clobber */, implicit-def dead early-clobber $xmm2, 12 /* clobber */, implicit-def dead early-clobber $xmm3, 12 /* clobber */, implicit-def dead early-clobber $xmm4, 12 /* clobber */, implicit-def dead early-clobber $xmm5, 12 /* clobber */, implicit-def dead early-clobber $xmm6, 12 /* clobber */, implicit-def dead early-clobber $xmm7, 12 /* clobber */, implicit-def dead early-clobber $xmm8, 12 /* clobber */, implicit-def dead early-clobber $xmm9, 12 /* clobber */, implicit-def dead early-clobber $xmm10, 12 /* clobber */, implicit-def dead early-clobber $xmm11, 12 /* clobber */, implicit-def dead early-clobber $xmm12, 12 /* clobber */, implicit-def dead early-clobber $xmm13, 12 /* clobber */, implicit-def dead early-clobber $xmm14, 12 /* clobber */, implicit-def dead early-clobber $xmm15, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags
26+
; CHECK-NEXT: renamable $xmm0 = MOVSSrm $rsp, 1, $noreg, -4, $noreg :: (load (s32) from %stack.0, align 2)
27+
; CHECK-NEXT: FNOP implicit-def $fpsw, implicit killed renamable $xmm0
28+
; CHECK-NEXT: RET 0
29+
%0:fr16 = COPY killed $xmm0
30+
INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $xmm0, 12 /* clobber */, implicit-def dead early-clobber $xmm1, 12 /* clobber */, implicit-def dead early-clobber $xmm2, 12 /* clobber */, implicit-def dead early-clobber $xmm3, 12 /* clobber */, implicit-def dead early-clobber $xmm4, 12 /* clobber */, implicit-def dead early-clobber $xmm5, 12 /* clobber */, implicit-def dead early-clobber $xmm6, 12 /* clobber */, implicit-def dead early-clobber $xmm7, 12 /* clobber */, implicit-def dead early-clobber $xmm8, 12 /* clobber */, implicit-def dead early-clobber $xmm9, 12 /* clobber */, implicit-def dead early-clobber $xmm10, 12 /* clobber */, implicit-def dead early-clobber $xmm11, 12 /* clobber */, implicit-def dead early-clobber $xmm12, 12 /* clobber */, implicit-def dead early-clobber $xmm13, 12 /* clobber */, implicit-def dead early-clobber $xmm14, 12 /* clobber */, implicit-def dead early-clobber $xmm15, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags
31+
FNOP implicit-def $fpsw, implicit %0:fr16
32+
RET 0
33+
34+
...
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefixes=SSE2
3+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -verify-machineinstrs | FileCheck %s --check-prefixes=AVX
4+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -verify-machineinstrs | FileCheck %s --check-prefixes=AVX512
5+
6+
define half @test(float %f, ptr %p) nounwind {
7+
; SSE2-LABEL: test:
8+
; SSE2: # %bb.0:
9+
; SSE2-NEXT: pushq %rbx
10+
; SSE2-NEXT: subq $16, %rsp
11+
; SSE2-NEXT: movq %rdi, %rbx
12+
; SSE2-NEXT: callq __truncsfhf2@PLT
13+
; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
14+
; SSE2-NEXT: callq __extendhfsf2@PLT
15+
; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
16+
; SSE2-NEXT: #APP
17+
; SSE2-NEXT: #NO_APP
18+
; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
19+
; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
20+
; SSE2-NEXT: movss %xmm0, (%rbx)
21+
; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
22+
; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
23+
; SSE2-NEXT: addq $16, %rsp
24+
; SSE2-NEXT: popq %rbx
25+
; SSE2-NEXT: retq
26+
;
27+
; AVX-LABEL: test:
28+
; AVX: # %bb.0:
29+
; AVX-NEXT: pushq %rbx
30+
; AVX-NEXT: subq $16, %rsp
31+
; AVX-NEXT: movq %rdi, %rbx
32+
; AVX-NEXT: callq __truncsfhf2@PLT
33+
; AVX-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
34+
; AVX-NEXT: callq __extendhfsf2@PLT
35+
; AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
36+
; AVX-NEXT: #APP
37+
; AVX-NEXT: #NO_APP
38+
; AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
39+
; AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
40+
; AVX-NEXT: vmovss %xmm0, (%rbx)
41+
; AVX-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
42+
; AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
43+
; AVX-NEXT: addq $16, %rsp
44+
; AVX-NEXT: popq %rbx
45+
; AVX-NEXT: retq
46+
;
47+
; AVX512-LABEL: test:
48+
; AVX512: # %bb.0:
49+
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
50+
; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
51+
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
52+
; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
53+
; AVX512-NEXT: #APP
54+
; AVX512-NEXT: #NO_APP
55+
; AVX512-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
56+
; AVX512-NEXT: vmovss %xmm0, (%rdi)
57+
; AVX512-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
58+
; AVX512-NEXT: retq
59+
%t = fptrunc float %f to half
60+
%t2 = fpext half %t to float
61+
tail call void asm sideeffect "", "~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{dirflag},~{fpsr},~{flags}"()
62+
store float %t2, ptr %p
63+
ret half %t
64+
}

llvm/test/CodeGen/X86/frem.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@ define void @frem_f16(half %a0, half %a1, ptr%p3) nounwind {
99
; CHECK-NEXT: pushq %rbx
1010
; CHECK-NEXT: subq $16, %rsp
1111
; CHECK-NEXT: movq %rdi, %rbx
12-
; CHECK-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
12+
; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1313
; CHECK-NEXT: vmovaps %xmm1, %xmm0
1414
; CHECK-NEXT: callq __extendhfsf2@PLT
1515
; CHECK-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
16-
; CHECK-NEXT: vmovd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
16+
; CHECK-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
1717
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
1818
; CHECK-NEXT: callq __extendhfsf2@PLT
1919
; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload

0 commit comments

Comments
 (0)