Skip to content

Commit 7f9d72a

Browse files
authored
[AArch64] Lower FPR register moves to zero cycle NEON (#153158)
[AArch64] Lower FPR register moves to zero cycle NEON Lower FPR64, FPR32, FPR16, FPR8 register moves into NEON moves if the target supports zero cycle move for NEON but not for the narrower classes. Adds a subtarget feature called FeatureZCRegMoveFPR128 that enables to query wether the target supports zero cycle reg move for FPR128 NEON registers, and embeds it into the appropriate processors. Includes lowering test cases, and specializes check prefixes.
1 parent 8843bc5 commit 7f9d72a

File tree

4 files changed

+191
-53
lines changed

4 files changed

+191
-53
lines changed

llvm/lib/Target/AArch64/AArch64Features.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -621,6 +621,9 @@ def FeatureZCRegMoveGPR64 : SubtargetFeature<"zcm-gpr64", "HasZeroCycleRegMoveGP
621621
def FeatureZCRegMoveGPR32 : SubtargetFeature<"zcm-gpr32", "HasZeroCycleRegMoveGPR32", "true",
622622
"Has zero-cycle register moves for GPR32 registers">;
623623

624+
def FeatureZCRegMoveFPR128 : SubtargetFeature<"zcm-fpr128", "HasZeroCycleRegMoveFPR128", "true",
625+
"Has zero-cycle register moves for FPR128 registers">;
626+
624627
def FeatureZCRegMoveFPR64 : SubtargetFeature<"zcm-fpr64", "HasZeroCycleRegMoveFPR64", "true",
625628
"Has zero-cycle register moves for FPR64 registers">;
626629

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 74 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5318,15 +5318,49 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
53185318

53195319
if (AArch64::FPR64RegClass.contains(DestReg) &&
53205320
AArch64::FPR64RegClass.contains(SrcReg)) {
5321-
BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
5322-
.addReg(SrcReg, getKillRegState(KillSrc));
5321+
if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5322+
!Subtarget.hasZeroCycleRegMoveFPR64() &&
5323+
!Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5324+
const TargetRegisterInfo *TRI = &getRegisterInfo();
5325+
MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::dsub,
5326+
&AArch64::FPR128RegClass);
5327+
MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::dsub,
5328+
&AArch64::FPR128RegClass);
5329+
// This instruction is reading and writing Q registers. This may upset
5330+
// the register scavenger and machine verifier, so we need to indicate
5331+
// that we are reading an undefined value from SrcRegQ, but a proper
5332+
// value from SrcReg.
5333+
BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5334+
.addReg(SrcRegQ, RegState::Undef)
5335+
.addReg(SrcRegQ, RegState::Undef)
5336+
.addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5337+
} else {
5338+
BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
5339+
.addReg(SrcReg, getKillRegState(KillSrc));
5340+
}
53235341
return;
53245342
}
53255343

53265344
if (AArch64::FPR32RegClass.contains(DestReg) &&
53275345
AArch64::FPR32RegClass.contains(SrcReg)) {
5328-
if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5329-
!Subtarget.hasZeroCycleRegMoveFPR32()) {
5346+
if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5347+
!Subtarget.hasZeroCycleRegMoveFPR64() &&
5348+
!Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5349+
const TargetRegisterInfo *TRI = &getRegisterInfo();
5350+
MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::ssub,
5351+
&AArch64::FPR128RegClass);
5352+
MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::ssub,
5353+
&AArch64::FPR128RegClass);
5354+
// This instruction is reading and writing Q registers. This may upset
5355+
// the register scavenger and machine verifier, so we need to indicate
5356+
// that we are reading an undefined value from SrcRegQ, but a proper
5357+
// value from SrcReg.
5358+
BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5359+
.addReg(SrcRegQ, RegState::Undef)
5360+
.addReg(SrcRegQ, RegState::Undef)
5361+
.addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5362+
} else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5363+
!Subtarget.hasZeroCycleRegMoveFPR32()) {
53305364
const TargetRegisterInfo *TRI = &getRegisterInfo();
53315365
MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::ssub,
53325366
&AArch64::FPR64RegClass);
@@ -5348,8 +5382,24 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
53485382

53495383
if (AArch64::FPR16RegClass.contains(DestReg) &&
53505384
AArch64::FPR16RegClass.contains(SrcReg)) {
5351-
if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5352-
!Subtarget.hasZeroCycleRegMoveFPR32()) {
5385+
if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5386+
!Subtarget.hasZeroCycleRegMoveFPR64() &&
5387+
!Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5388+
const TargetRegisterInfo *TRI = &getRegisterInfo();
5389+
MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::hsub,
5390+
&AArch64::FPR128RegClass);
5391+
MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::hsub,
5392+
&AArch64::FPR128RegClass);
5393+
// This instruction is reading and writing Q registers. This may upset
5394+
// the register scavenger and machine verifier, so we need to indicate
5395+
// that we are reading an undefined value from SrcRegQ, but a proper
5396+
// value from SrcReg.
5397+
BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5398+
.addReg(SrcRegQ, RegState::Undef)
5399+
.addReg(SrcRegQ, RegState::Undef)
5400+
.addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5401+
} else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5402+
!Subtarget.hasZeroCycleRegMoveFPR32()) {
53535403
const TargetRegisterInfo *TRI = &getRegisterInfo();
53545404
MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::hsub,
53555405
&AArch64::FPR64RegClass);
@@ -5375,8 +5425,24 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
53755425

53765426
if (AArch64::FPR8RegClass.contains(DestReg) &&
53775427
AArch64::FPR8RegClass.contains(SrcReg)) {
5378-
if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5379-
!Subtarget.hasZeroCycleRegMoveFPR32()) {
5428+
if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5429+
!Subtarget.hasZeroCycleRegMoveFPR64() &&
5430+
!Subtarget.hasZeroCycleRegMoveFPR64() && Subtarget.isNeonAvailable()) {
5431+
const TargetRegisterInfo *TRI = &getRegisterInfo();
5432+
MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::bsub,
5433+
&AArch64::FPR128RegClass);
5434+
MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::bsub,
5435+
&AArch64::FPR128RegClass);
5436+
// This instruction is reading and writing Q registers. This may upset
5437+
// the register scavenger and machine verifier, so we need to indicate
5438+
// that we are reading an undefined value from SrcRegQ, but a proper
5439+
// value from SrcReg.
5440+
BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5441+
.addReg(SrcRegQ, RegState::Undef)
5442+
.addReg(SrcRegQ, RegState::Undef)
5443+
.addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5444+
} else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5445+
!Subtarget.hasZeroCycleRegMoveFPR32()) {
53805446
const TargetRegisterInfo *TRI = &getRegisterInfo();
53815447
MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::bsub,
53825448
&AArch64::FPR64RegClass);

llvm/lib/Target/AArch64/AArch64Processors.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,7 @@ def TuneAppleA7 : SubtargetFeature<"apple-a7", "ARMProcFamily", "AppleA7",
321321
FeatureFuseAES, FeatureFuseCryptoEOR,
322322
FeatureStorePairSuppress,
323323
FeatureZCRegMoveGPR64,
324+
FeatureZCRegMoveFPR128,
324325
FeatureZCZeroing,
325326
FeatureZCZeroingFPWorkaround]>;
326327

@@ -334,6 +335,7 @@ def TuneAppleA10 : SubtargetFeature<"apple-a10", "ARMProcFamily", "AppleA10",
334335
FeatureFuseCryptoEOR,
335336
FeatureStorePairSuppress,
336337
FeatureZCRegMoveGPR64,
338+
FeatureZCRegMoveFPR128,
337339
FeatureZCZeroing]>;
338340

339341
def TuneAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11",
@@ -346,6 +348,7 @@ def TuneAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11",
346348
FeatureFuseCryptoEOR,
347349
FeatureStorePairSuppress,
348350
FeatureZCRegMoveGPR64,
351+
FeatureZCRegMoveFPR128,
349352
FeatureZCZeroing]>;
350353

351354
def TuneAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12",
@@ -358,6 +361,7 @@ def TuneAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12",
358361
FeatureFuseCryptoEOR,
359362
FeatureStorePairSuppress,
360363
FeatureZCRegMoveGPR64,
364+
FeatureZCRegMoveFPR128,
361365
FeatureZCZeroing]>;
362366

363367
def TuneAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13",
@@ -370,6 +374,7 @@ def TuneAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13",
370374
FeatureFuseCryptoEOR,
371375
FeatureStorePairSuppress,
372376
FeatureZCRegMoveGPR64,
377+
FeatureZCRegMoveFPR128,
373378
FeatureZCZeroing]>;
374379

375380
def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14",
@@ -387,6 +392,7 @@ def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14",
387392
FeatureFuseLiterals,
388393
FeatureStorePairSuppress,
389394
FeatureZCRegMoveGPR64,
395+
FeatureZCRegMoveFPR128,
390396
FeatureZCZeroing]>;
391397

392398
def TuneAppleA15 : SubtargetFeature<"apple-a15", "ARMProcFamily", "AppleA15",
@@ -404,6 +410,7 @@ def TuneAppleA15 : SubtargetFeature<"apple-a15", "ARMProcFamily", "AppleA15",
404410
FeatureFuseLiterals,
405411
FeatureStorePairSuppress,
406412
FeatureZCRegMoveGPR64,
413+
FeatureZCRegMoveFPR128,
407414
FeatureZCZeroing]>;
408415

409416
def TuneAppleA16 : SubtargetFeature<"apple-a16", "ARMProcFamily", "AppleA16",
@@ -421,6 +428,7 @@ def TuneAppleA16 : SubtargetFeature<"apple-a16", "ARMProcFamily", "AppleA16",
421428
FeatureFuseLiterals,
422429
FeatureStorePairSuppress,
423430
FeatureZCRegMoveGPR64,
431+
FeatureZCRegMoveFPR128,
424432
FeatureZCZeroing]>;
425433

426434
def TuneAppleA17 : SubtargetFeature<"apple-a17", "ARMProcFamily", "AppleA17",
@@ -438,6 +446,7 @@ def TuneAppleA17 : SubtargetFeature<"apple-a17", "ARMProcFamily", "AppleA17",
438446
FeatureFuseLiterals,
439447
FeatureStorePairSuppress,
440448
FeatureZCRegMoveGPR64,
449+
FeatureZCRegMoveFPR128,
441450
FeatureZCZeroing]>;
442451

443452
def TuneAppleM4 : SubtargetFeature<"apple-m4", "ARMProcFamily", "AppleM4",
@@ -454,6 +463,7 @@ def TuneAppleM4 : SubtargetFeature<"apple-m4", "ARMProcFamily", "AppleM4",
454463
FeatureFuseCryptoEOR,
455464
FeatureFuseLiterals,
456465
FeatureZCRegMoveGPR64,
466+
FeatureZCRegMoveFPR128,
457467
FeatureZCZeroing
458468
]>;
459469

llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov-fpr.ll

Lines changed: 104 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,84 @@
1-
; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s -check-prefixes=NOTCPU-LINUX --match-full-lines
2-
; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=generic | FileCheck %s -check-prefixes=NOTCPU-APPLE --match-full-lines
3-
; RUN: llc < %s -mtriple=arm64-apple-macosx -mattr=+zcm-fpr64 | FileCheck %s -check-prefixes=ATTR --match-full-lines
1+
; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s -check-prefixes=NOZCM-FPR128-CPU --match-full-lines
2+
; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=generic | FileCheck %s -check-prefixes=NOZCM-FPR128-CPU --match-full-lines
3+
; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=apple-m1 | FileCheck %s -check-prefixes=ZCM-FPR128-CPU --match-full-lines
4+
; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=apple-m1 -mattr=-zcm-fpr128 | FileCheck %s -check-prefixes=NOZCM-FPR128-ATTR --match-full-lines
5+
; RUN: llc < %s -mtriple=arm64-apple-macosx -mattr=+zcm-fpr128 | FileCheck %s -check-prefixes=ZCM-FPR128-ATTR --match-full-lines
6+
7+
define void @zero_cycle_regmov_FPR64(double %a, double %b, double %c, double %d) {
8+
entry:
9+
; CHECK-LABEL: t:
10+
; NOZCM-FPR128-CPU: fmov d0, d2
11+
; NOZCM-FPR128-CPU: fmov d1, d3
12+
; NOZCM-FPR128-CPU: fmov [[REG2:d[0-9]+]], d3
13+
; NOZCM-FPR128-CPU: fmov [[REG1:d[0-9]+]], d2
14+
; NOZCM-FPR128-CPU-NEXT: bl {{_?foo_double}}
15+
; NOZCM-FPR128-CPU: fmov d0, [[REG1]]
16+
; NOZCM-FPR128-CPU: fmov d1, [[REG2]]
17+
18+
; ZCM-FPR128-CPU: mov.16b [[REG2:v[0-9]+]], v3
19+
; ZCM-FPR128-CPU: mov.16b [[REG1:v[0-9]+]], v2
20+
; ZCM-FPR128-CPU: mov.16b v0, v2
21+
; ZCM-FPR128-CPU: mov.16b v1, v3
22+
; ZCM-FPR128-CPU-NEXT: bl {{_?foo_double}}
23+
; ZCM-FPR128-CPU: mov.16b v0, [[REG1]]
24+
; ZCM-FPR128-CPU: mov.16b v1, [[REG2]]
25+
26+
; NOZCM-FPR128-ATTR: fmov [[REG2:d[0-9]+]], d3
27+
; NOZCM-FPR128-ATTR: fmov [[REG1:d[0-9]+]], d2
28+
; NOZCM-FPR128-ATTR: fmov d0, d2
29+
; NOZCM-FPR128-ATTR: fmov d1, d3
30+
; NOZCM-FPR128-ATTR-NEXT: bl {{_?foo_double}}
31+
; NOZCM-FPR128-ATTR: fmov d0, [[REG1]]
32+
; NOZCM-FPR128-ATTR: fmov d1, [[REG2]]
33+
34+
; ZCM-FPR128-ATTR: mov.16b v0, v2
35+
; ZCM-FPR128-ATTR: mov.16b v1, v3
36+
; ZCM-FPR128-ATTR: mov.16b [[REG2:v[0-9]+]], v3
37+
; ZCM-FPR128-ATTR: mov.16b [[REG1:v[0-9]+]], v2
38+
; ZCM-FPR128-ATTR-NEXT: bl {{_?foo_double}}
39+
; ZCM-FPR128-ATTR: mov.16b v0, [[REG1]]
40+
; ZCM-FPR128-ATTR: mov.16b v1, [[REG2]]
41+
%call = call double @foo_double(double %c, double %d)
42+
%call1 = call double @foo_double(double %c, double %d)
43+
unreachable
44+
}
45+
46+
declare float @foo_double(double, double)
447

548
define void @zero_cycle_regmov_FPR32(float %a, float %b, float %c, float %d) {
649
entry:
750
; CHECK-LABEL: t:
8-
; NOTCPU-LINUX: fmov s0, s2
9-
; NOTCPU-LINUX: fmov s1, s3
10-
; NOTCPU-LINUX: fmov [[REG2:s[0-9]+]], s3
11-
; NOTCPU-LINUX: fmov [[REG1:s[0-9]+]], s2
12-
; NOTCPU-LINUX-NEXT: bl {{_?foo_float}}
13-
; NOTCPU-LINUX: fmov s0, [[REG1]]
14-
; NOTCPU-LINUX: fmov s1, [[REG2]]
51+
; NOZCM-FPR128-CPU: fmov s0, s2
52+
; NOZCM-FPR128-CPU: fmov s1, s3
53+
; NOZCM-FPR128-CPU: fmov [[REG2:s[0-9]+]], s3
54+
; NOZCM-FPR128-CPU: fmov [[REG1:s[0-9]+]], s2
55+
; NOZCM-FPR128-CPU-NEXT: bl {{_?foo_float}}
56+
; NOZCM-FPR128-CPU: fmov s0, [[REG1]]
57+
; NOZCM-FPR128-CPU: fmov s1, [[REG2]]
1558

16-
; NOTCPU-APPLE: fmov s0, s2
17-
; NOTCPU-APPLE: fmov s1, s3
18-
; NOTCPU-APPLE: fmov [[REG2:s[0-9]+]], s3
19-
; NOTCPU-APPLE: fmov [[REG1:s[0-9]+]], s2
20-
; NOTCPU-APPLE-NEXT: bl {{_?foo_float}}
21-
; NOTCPU-APPLE: fmov s0, [[REG1]]
22-
; NOTCPU-APPLE: fmov s1, [[REG2]]
59+
; ZCM-FPR128-CPU: mov.16b [[REG2:v[0-9]+]], v3
60+
; ZCM-FPR128-CPU: mov.16b [[REG1:v[0-9]+]], v2
61+
; ZCM-FPR128-CPU: mov.16b v0, v2
62+
; ZCM-FPR128-CPU: mov.16b v1, v3
63+
; ZCM-FPR128-CPU-NEXT: bl {{_?foo_float}}
64+
; ZCM-FPR128-CPU: mov.16b v0, [[REG1]]
65+
; ZCM-FPR128-CPU: mov.16b v1, [[REG2]]
2366

24-
; ATTR: fmov d0, d2
25-
; ATTR: fmov d1, d3
26-
; ATTR: fmov [[REG2:d[0-9]+]], d3
27-
; ATTR: fmov [[REG1:d[0-9]+]], d2
28-
; ATTR-NEXT: bl {{_?foo_float}}
29-
; ATTR: fmov d0, [[REG1]]
30-
; ATTR: fmov d1, [[REG2]]
67+
; NOZCM-FPR128-ATTR: fmov [[REG2:s[0-9]+]], s3
68+
; NOZCM-FPR128-ATTR: fmov [[REG1:s[0-9]+]], s2
69+
; NOZCM-FPR128-ATTR: fmov s0, s2
70+
; NOZCM-FPR128-ATTR: fmov s1, s3
71+
; NOZCM-FPR128-ATTR-NEXT: bl {{_?foo_float}}
72+
; NOZCM-FPR128-ATTR: fmov s0, [[REG1]]
73+
; NOZCM-FPR128-ATTR: fmov s1, [[REG2]]
74+
75+
; ZCM-FPR128-ATTR: mov.16b v0, v2
76+
; ZCM-FPR128-ATTR: mov.16b v1, v3
77+
; ZCM-FPR128-ATTR: mov.16b [[REG2:v[0-9]+]], v3
78+
; ZCM-FPR128-ATTR: mov.16b [[REG1:v[0-9]+]], v2
79+
; ZCM-FPR128-ATTR-NEXT: bl {{_?foo_float}}
80+
; ZCM-FPR128-ATTR: mov.16b v0, [[REG1]]
81+
; ZCM-FPR128-ATTR: mov.16b v1, [[REG2]]
3182
%call = call float @foo_float(float %c, float %d)
3283
%call1 = call float @foo_float(float %c, float %d)
3384
unreachable
@@ -38,29 +89,37 @@ declare float @foo_float(float, float)
3889
define void @zero_cycle_regmov_FPR16(half %a, half %b, half %c, half %d) {
3990
entry:
4091
; CHECK-LABEL: t:
41-
; NOTCPU-LINUX: fmov s0, s2
42-
; NOTCPU-LINUX: fmov s1, s3
43-
; NOTCPU-LINUX: fmov [[REG2:s[0-9]+]], s3
44-
; NOTCPU-LINUX: fmov [[REG1:s[0-9]+]], s2
45-
; NOTCPU-LINUX-NEXT: bl {{_?foo_half}}
46-
; NOTCPU-LINUX: fmov s0, [[REG1]]
47-
; NOTCPU-LINUX: fmov s1, [[REG2]]
92+
; NOZCM-FPR128-CPU: fmov s0, s2
93+
; NOZCM-FPR128-CPU: fmov s1, s3
94+
; NOZCM-FPR128-CPU: fmov [[REG2:s[0-9]+]], s3
95+
; NOZCM-FPR128-CPU: fmov [[REG1:s[0-9]+]], s2
96+
; NOZCM-FPR128-CPU-NEXT: bl {{_?foo_half}}
97+
; NOZCM-FPR128-CPU: fmov s0, [[REG1]]
98+
; NOZCM-FPR128-CPU: fmov s1, [[REG2]]
99+
100+
; ZCM-FPR128-CPU: mov.16b [[REG2:v[0-9]+]], v3
101+
; ZCM-FPR128-CPU: mov.16b [[REG1:v[0-9]+]], v2
102+
; ZCM-FPR128-CPU: mov.16b v0, v2
103+
; ZCM-FPR128-CPU: mov.16b v1, v3
104+
; ZCM-FPR128-CPU-NEXT: bl {{_?foo_half}}
105+
; ZCM-FPR128-CPU: mov.16b v0, [[REG1]]
106+
; ZCM-FPR128-CPU: mov.16b v1, [[REG2]]
48107

49-
; NOTCPU-APPLE: fmov s0, s2
50-
; NOTCPU-APPLE: fmov s1, s3
51-
; NOTCPU-APPLE: fmov [[REG2:s[0-9]+]], s3
52-
; NOTCPU-APPLE: fmov [[REG1:s[0-9]+]], s2
53-
; NOTCPU-APPLE-NEXT: bl {{_?foo_half}}
54-
; NOTCPU-APPLE: fmov s0, [[REG1]]
55-
; NOTCPU-APPLE: fmov s1, [[REG2]]
108+
; NOZCM-FPR128-ATTR: fmov [[REG2:s[0-9]+]], s3
109+
; NOZCM-FPR128-ATTR: fmov [[REG1:s[0-9]+]], s2
110+
; NOZCM-FPR128-ATTR: fmov s0, s2
111+
; NOZCM-FPR128-ATTR: fmov s1, s3
112+
; NOZCM-FPR128-ATTR-NEXT: bl {{_?foo_half}}
113+
; NOZCM-FPR128-ATTR: fmov s0, [[REG1]]
114+
; NOZCM-FPR128-ATTR: fmov s1, [[REG2]]
56115

57-
; ATTR: fmov d0, d2
58-
; ATTR: fmov d1, d3
59-
; ATTR: fmov [[REG2:d[0-9]+]], d3
60-
; ATTR: fmov [[REG1:d[0-9]+]], d2
61-
; ATTR-NEXT: bl {{_?foo_half}}
62-
; ATTR: fmov d0, [[REG1]]
63-
; ATTR: fmov d1, [[REG2]]
116+
; ZCM-FPR128-ATTR: mov.16b v0, v2
117+
; ZCM-FPR128-ATTR: mov.16b v1, v3
118+
; ZCM-FPR128-ATTR: mov.16b [[REG2:v[0-9]+]], v3
119+
; ZCM-FPR128-ATTR: mov.16b [[REG1:v[0-9]+]], v2
120+
; ZCM-FPR128-ATTR-NEXT: bl {{_?foo_half}}
121+
; ZCM-FPR128-ATTR: mov.16b v0, [[REG1]]
122+
; ZCM-FPR128-ATTR: mov.16b v1, [[REG2]]
64123
%call = call half @foo_half(half %c, half %d)
65124
%call1 = call half @foo_half(half %c, half %d)
66125
unreachable

0 commit comments

Comments
 (0)