Skip to content

Commit 92334f1

Browse files
committed
[AArch64] Lower FPR register moves to zero cycle NEON (#153158)
[AArch64] Lower FPR register moves to zero cycle NEON Lower FPR64, FPR32, FPR16, FPR8 register moves into NEON moves if the target supports zero cycle move for NEON but not for the narrower classes. Adds a subtarget feature called FeatureZCRegMoveFPR128 that enables to query wether the target supports zero cycle reg move for FPR128 NEON registers, and embeds it into the appropriate processors. Includes lowering test cases, and specializes check prefixes. (cherry-pick 7f9d72a)
1 parent e4e279c commit 92334f1

File tree

4 files changed

+191
-53
lines changed

4 files changed

+191
-53
lines changed

llvm/lib/Target/AArch64/AArch64Features.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -621,6 +621,9 @@ def FeatureZCRegMoveGPR64 : SubtargetFeature<"zcm-gpr64", "HasZeroCycleRegMoveGP
621621
def FeatureZCRegMoveGPR32 : SubtargetFeature<"zcm-gpr32", "HasZeroCycleRegMoveGPR32", "true",
622622
"Has zero-cycle register moves for GPR32 registers">;
623623

624+
def FeatureZCRegMoveFPR128 : SubtargetFeature<"zcm-fpr128", "HasZeroCycleRegMoveFPR128", "true",
625+
"Has zero-cycle register moves for FPR128 registers">;
626+
624627
def FeatureZCRegMoveFPR64 : SubtargetFeature<"zcm-fpr64", "HasZeroCycleRegMoveFPR64", "true",
625628
"Has zero-cycle register moves for FPR64 registers">;
626629

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 74 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5307,15 +5307,49 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
53075307

53085308
if (AArch64::FPR64RegClass.contains(DestReg) &&
53095309
AArch64::FPR64RegClass.contains(SrcReg)) {
5310-
BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
5311-
.addReg(SrcReg, getKillRegState(KillSrc));
5310+
if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5311+
!Subtarget.hasZeroCycleRegMoveFPR64() &&
5312+
!Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5313+
const TargetRegisterInfo *TRI = &getRegisterInfo();
5314+
MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::dsub,
5315+
&AArch64::FPR128RegClass);
5316+
MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::dsub,
5317+
&AArch64::FPR128RegClass);
5318+
// This instruction is reading and writing Q registers. This may upset
5319+
// the register scavenger and machine verifier, so we need to indicate
5320+
// that we are reading an undefined value from SrcRegQ, but a proper
5321+
// value from SrcReg.
5322+
BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5323+
.addReg(SrcRegQ, RegState::Undef)
5324+
.addReg(SrcRegQ, RegState::Undef)
5325+
.addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5326+
} else {
5327+
BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
5328+
.addReg(SrcReg, getKillRegState(KillSrc));
5329+
}
53125330
return;
53135331
}
53145332

53155333
if (AArch64::FPR32RegClass.contains(DestReg) &&
53165334
AArch64::FPR32RegClass.contains(SrcReg)) {
5317-
if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5318-
!Subtarget.hasZeroCycleRegMoveFPR32()) {
5335+
if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5336+
!Subtarget.hasZeroCycleRegMoveFPR64() &&
5337+
!Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5338+
const TargetRegisterInfo *TRI = &getRegisterInfo();
5339+
MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::ssub,
5340+
&AArch64::FPR128RegClass);
5341+
MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::ssub,
5342+
&AArch64::FPR128RegClass);
5343+
// This instruction is reading and writing Q registers. This may upset
5344+
// the register scavenger and machine verifier, so we need to indicate
5345+
// that we are reading an undefined value from SrcRegQ, but a proper
5346+
// value from SrcReg.
5347+
BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5348+
.addReg(SrcRegQ, RegState::Undef)
5349+
.addReg(SrcRegQ, RegState::Undef)
5350+
.addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5351+
} else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5352+
!Subtarget.hasZeroCycleRegMoveFPR32()) {
53195353
const TargetRegisterInfo *TRI = &getRegisterInfo();
53205354
MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::ssub,
53215355
&AArch64::FPR64RegClass);
@@ -5337,8 +5371,24 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
53375371

53385372
if (AArch64::FPR16RegClass.contains(DestReg) &&
53395373
AArch64::FPR16RegClass.contains(SrcReg)) {
5340-
if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5341-
!Subtarget.hasZeroCycleRegMoveFPR32()) {
5374+
if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5375+
!Subtarget.hasZeroCycleRegMoveFPR64() &&
5376+
!Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5377+
const TargetRegisterInfo *TRI = &getRegisterInfo();
5378+
MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::hsub,
5379+
&AArch64::FPR128RegClass);
5380+
MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::hsub,
5381+
&AArch64::FPR128RegClass);
5382+
// This instruction is reading and writing Q registers. This may upset
5383+
// the register scavenger and machine verifier, so we need to indicate
5384+
// that we are reading an undefined value from SrcRegQ, but a proper
5385+
// value from SrcReg.
5386+
BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5387+
.addReg(SrcRegQ, RegState::Undef)
5388+
.addReg(SrcRegQ, RegState::Undef)
5389+
.addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5390+
} else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5391+
!Subtarget.hasZeroCycleRegMoveFPR32()) {
53425392
const TargetRegisterInfo *TRI = &getRegisterInfo();
53435393
MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::hsub,
53445394
&AArch64::FPR64RegClass);
@@ -5364,8 +5414,24 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
53645414

53655415
if (AArch64::FPR8RegClass.contains(DestReg) &&
53665416
AArch64::FPR8RegClass.contains(SrcReg)) {
5367-
if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5368-
!Subtarget.hasZeroCycleRegMoveFPR32()) {
5417+
if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5418+
!Subtarget.hasZeroCycleRegMoveFPR64() &&
5419+
!Subtarget.hasZeroCycleRegMoveFPR64() && Subtarget.isNeonAvailable()) {
5420+
const TargetRegisterInfo *TRI = &getRegisterInfo();
5421+
MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::bsub,
5422+
&AArch64::FPR128RegClass);
5423+
MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::bsub,
5424+
&AArch64::FPR128RegClass);
5425+
// This instruction is reading and writing Q registers. This may upset
5426+
// the register scavenger and machine verifier, so we need to indicate
5427+
// that we are reading an undefined value from SrcRegQ, but a proper
5428+
// value from SrcReg.
5429+
BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5430+
.addReg(SrcRegQ, RegState::Undef)
5431+
.addReg(SrcRegQ, RegState::Undef)
5432+
.addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5433+
} else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5434+
!Subtarget.hasZeroCycleRegMoveFPR32()) {
53695435
const TargetRegisterInfo *TRI = &getRegisterInfo();
53705436
MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::bsub,
53715437
&AArch64::FPR64RegClass);

llvm/lib/Target/AArch64/AArch64Processors.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,7 @@ def TuneAppleA7 : SubtargetFeature<"apple-a7", "ARMProcFamily", "AppleA7",
312312
FeatureFuseAES, FeatureFuseCryptoEOR,
313313
FeatureStorePairSuppress,
314314
FeatureZCRegMoveGPR64,
315+
FeatureZCRegMoveFPR128,
315316
FeatureZCZeroing,
316317
FeatureZCZeroingFPWorkaround]>;
317318

@@ -325,6 +326,7 @@ def TuneAppleA10 : SubtargetFeature<"apple-a10", "ARMProcFamily", "AppleA10",
325326
FeatureFuseCryptoEOR,
326327
FeatureStorePairSuppress,
327328
FeatureZCRegMoveGPR64,
329+
FeatureZCRegMoveFPR128,
328330
FeatureZCZeroing]>;
329331

330332
def TuneAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11",
@@ -337,6 +339,7 @@ def TuneAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11",
337339
FeatureFuseCryptoEOR,
338340
FeatureStorePairSuppress,
339341
FeatureZCRegMoveGPR64,
342+
FeatureZCRegMoveFPR128,
340343
FeatureZCZeroing]>;
341344

342345
def TuneAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12",
@@ -349,6 +352,7 @@ def TuneAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12",
349352
FeatureFuseCryptoEOR,
350353
FeatureStorePairSuppress,
351354
FeatureZCRegMoveGPR64,
355+
FeatureZCRegMoveFPR128,
352356
FeatureZCZeroing]>;
353357

354358
def TuneAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13",
@@ -361,6 +365,7 @@ def TuneAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13",
361365
FeatureFuseCryptoEOR,
362366
FeatureStorePairSuppress,
363367
FeatureZCRegMoveGPR64,
368+
FeatureZCRegMoveFPR128,
364369
FeatureZCZeroing]>;
365370

366371
def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14",
@@ -378,6 +383,7 @@ def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14",
378383
FeatureFuseLiterals,
379384
FeatureStorePairSuppress,
380385
FeatureZCRegMoveGPR64,
386+
FeatureZCRegMoveFPR128,
381387
FeatureZCZeroing]>;
382388

383389
def TuneAppleA15 : SubtargetFeature<"apple-a15", "ARMProcFamily", "AppleA15",
@@ -395,6 +401,7 @@ def TuneAppleA15 : SubtargetFeature<"apple-a15", "ARMProcFamily", "AppleA15",
395401
FeatureFuseLiterals,
396402
FeatureStorePairSuppress,
397403
FeatureZCRegMoveGPR64,
404+
FeatureZCRegMoveFPR128,
398405
FeatureZCZeroing]>;
399406

400407
def TuneAppleA16 : SubtargetFeature<"apple-a16", "ARMProcFamily", "AppleA16",
@@ -412,6 +419,7 @@ def TuneAppleA16 : SubtargetFeature<"apple-a16", "ARMProcFamily", "AppleA16",
412419
FeatureFuseLiterals,
413420
FeatureStorePairSuppress,
414421
FeatureZCRegMoveGPR64,
422+
FeatureZCRegMoveFPR128,
415423
FeatureZCZeroing]>;
416424

417425
def TuneAppleA17 : SubtargetFeature<"apple-a17", "ARMProcFamily", "AppleA17",
@@ -429,6 +437,7 @@ def TuneAppleA17 : SubtargetFeature<"apple-a17", "ARMProcFamily", "AppleA17",
429437
FeatureFuseLiterals,
430438
FeatureStorePairSuppress,
431439
FeatureZCRegMoveGPR64,
440+
FeatureZCRegMoveFPR128,
432441
FeatureZCZeroing]>;
433442

434443
def TuneAppleM4 : SubtargetFeature<"apple-m4", "ARMProcFamily", "AppleM4",
@@ -445,6 +454,7 @@ def TuneAppleM4 : SubtargetFeature<"apple-m4", "ARMProcFamily", "AppleM4",
445454
FeatureFuseCryptoEOR,
446455
FeatureFuseLiterals,
447456
FeatureZCRegMoveGPR64,
457+
FeatureZCRegMoveFPR128,
448458
FeatureZCZeroing
449459
]>;
450460

llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov-fpr.ll

Lines changed: 104 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,84 @@
1-
; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s -check-prefixes=NOTCPU-LINUX --match-full-lines
2-
; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=generic | FileCheck %s -check-prefixes=NOTCPU-APPLE --match-full-lines
3-
; RUN: llc < %s -mtriple=arm64-apple-macosx -mattr=+zcm-fpr64 | FileCheck %s -check-prefixes=ATTR --match-full-lines
1+
; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s -check-prefixes=NOZCM-FPR128-CPU --match-full-lines
2+
; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=generic | FileCheck %s -check-prefixes=NOZCM-FPR128-CPU --match-full-lines
3+
; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=apple-m1 | FileCheck %s -check-prefixes=ZCM-FPR128-CPU --match-full-lines
4+
; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=apple-m1 -mattr=-zcm-fpr128 | FileCheck %s -check-prefixes=NOZCM-FPR128-ATTR --match-full-lines
5+
; RUN: llc < %s -mtriple=arm64-apple-macosx -mattr=+zcm-fpr128 | FileCheck %s -check-prefixes=ZCM-FPR128-ATTR --match-full-lines
6+
7+
define void @zero_cycle_regmov_FPR64(double %a, double %b, double %c, double %d) {
8+
entry:
9+
; CHECK-LABEL: t:
10+
; NOZCM-FPR128-CPU: fmov d0, d2
11+
; NOZCM-FPR128-CPU: fmov d1, d3
12+
; NOZCM-FPR128-CPU: fmov [[REG2:d[0-9]+]], d3
13+
; NOZCM-FPR128-CPU: fmov [[REG1:d[0-9]+]], d2
14+
; NOZCM-FPR128-CPU-NEXT: bl {{_?foo_double}}
15+
; NOZCM-FPR128-CPU: fmov d0, [[REG1]]
16+
; NOZCM-FPR128-CPU: fmov d1, [[REG2]]
17+
18+
; ZCM-FPR128-CPU: mov.16b [[REG2:v[0-9]+]], v3
19+
; ZCM-FPR128-CPU: mov.16b [[REG1:v[0-9]+]], v2
20+
; ZCM-FPR128-CPU: mov.16b v0, v2
21+
; ZCM-FPR128-CPU: mov.16b v1, v3
22+
; ZCM-FPR128-CPU-NEXT: bl {{_?foo_double}}
23+
; ZCM-FPR128-CPU: mov.16b v0, [[REG1]]
24+
; ZCM-FPR128-CPU: mov.16b v1, [[REG2]]
25+
26+
; NOZCM-FPR128-ATTR: fmov [[REG2:d[0-9]+]], d3
27+
; NOZCM-FPR128-ATTR: fmov [[REG1:d[0-9]+]], d2
28+
; NOZCM-FPR128-ATTR: fmov d0, d2
29+
; NOZCM-FPR128-ATTR: fmov d1, d3
30+
; NOZCM-FPR128-ATTR-NEXT: bl {{_?foo_double}}
31+
; NOZCM-FPR128-ATTR: fmov d0, [[REG1]]
32+
; NOZCM-FPR128-ATTR: fmov d1, [[REG2]]
33+
34+
; ZCM-FPR128-ATTR: mov.16b v0, v2
35+
; ZCM-FPR128-ATTR: mov.16b v1, v3
36+
; ZCM-FPR128-ATTR: mov.16b [[REG2:v[0-9]+]], v3
37+
; ZCM-FPR128-ATTR: mov.16b [[REG1:v[0-9]+]], v2
38+
; ZCM-FPR128-ATTR-NEXT: bl {{_?foo_double}}
39+
; ZCM-FPR128-ATTR: mov.16b v0, [[REG1]]
40+
; ZCM-FPR128-ATTR: mov.16b v1, [[REG2]]
41+
%call = call double @foo_double(double %c, double %d)
42+
%call1 = call double @foo_double(double %c, double %d)
43+
unreachable
44+
}
45+
46+
declare float @foo_double(double, double)
447

548
define void @zero_cycle_regmov_FPR32(float %a, float %b, float %c, float %d) {
649
entry:
750
; CHECK-LABEL: t:
8-
; NOTCPU-LINUX: fmov s0, s2
9-
; NOTCPU-LINUX: fmov s1, s3
10-
; NOTCPU-LINUX: fmov [[REG2:s[0-9]+]], s3
11-
; NOTCPU-LINUX: fmov [[REG1:s[0-9]+]], s2
12-
; NOTCPU-LINUX-NEXT: bl {{_?foo_float}}
13-
; NOTCPU-LINUX: fmov s0, [[REG1]]
14-
; NOTCPU-LINUX: fmov s1, [[REG2]]
51+
; NOZCM-FPR128-CPU: fmov s0, s2
52+
; NOZCM-FPR128-CPU: fmov s1, s3
53+
; NOZCM-FPR128-CPU: fmov [[REG2:s[0-9]+]], s3
54+
; NOZCM-FPR128-CPU: fmov [[REG1:s[0-9]+]], s2
55+
; NOZCM-FPR128-CPU-NEXT: bl {{_?foo_float}}
56+
; NOZCM-FPR128-CPU: fmov s0, [[REG1]]
57+
; NOZCM-FPR128-CPU: fmov s1, [[REG2]]
1558

16-
; NOTCPU-APPLE: fmov s0, s2
17-
; NOTCPU-APPLE: fmov s1, s3
18-
; NOTCPU-APPLE: fmov [[REG2:s[0-9]+]], s3
19-
; NOTCPU-APPLE: fmov [[REG1:s[0-9]+]], s2
20-
; NOTCPU-APPLE-NEXT: bl {{_?foo_float}}
21-
; NOTCPU-APPLE: fmov s0, [[REG1]]
22-
; NOTCPU-APPLE: fmov s1, [[REG2]]
59+
; ZCM-FPR128-CPU: mov.16b [[REG2:v[0-9]+]], v3
60+
; ZCM-FPR128-CPU: mov.16b [[REG1:v[0-9]+]], v2
61+
; ZCM-FPR128-CPU: mov.16b v0, v2
62+
; ZCM-FPR128-CPU: mov.16b v1, v3
63+
; ZCM-FPR128-CPU-NEXT: bl {{_?foo_float}}
64+
; ZCM-FPR128-CPU: mov.16b v0, [[REG1]]
65+
; ZCM-FPR128-CPU: mov.16b v1, [[REG2]]
2366

24-
; ATTR: fmov d0, d2
25-
; ATTR: fmov d1, d3
26-
; ATTR: fmov [[REG2:d[0-9]+]], d3
27-
; ATTR: fmov [[REG1:d[0-9]+]], d2
28-
; ATTR-NEXT: bl {{_?foo_float}}
29-
; ATTR: fmov d0, [[REG1]]
30-
; ATTR: fmov d1, [[REG2]]
67+
; NOZCM-FPR128-ATTR: fmov [[REG2:s[0-9]+]], s3
68+
; NOZCM-FPR128-ATTR: fmov [[REG1:s[0-9]+]], s2
69+
; NOZCM-FPR128-ATTR: fmov s0, s2
70+
; NOZCM-FPR128-ATTR: fmov s1, s3
71+
; NOZCM-FPR128-ATTR-NEXT: bl {{_?foo_float}}
72+
; NOZCM-FPR128-ATTR: fmov s0, [[REG1]]
73+
; NOZCM-FPR128-ATTR: fmov s1, [[REG2]]
74+
75+
; ZCM-FPR128-ATTR: mov.16b v0, v2
76+
; ZCM-FPR128-ATTR: mov.16b v1, v3
77+
; ZCM-FPR128-ATTR: mov.16b [[REG2:v[0-9]+]], v3
78+
; ZCM-FPR128-ATTR: mov.16b [[REG1:v[0-9]+]], v2
79+
; ZCM-FPR128-ATTR-NEXT: bl {{_?foo_float}}
80+
; ZCM-FPR128-ATTR: mov.16b v0, [[REG1]]
81+
; ZCM-FPR128-ATTR: mov.16b v1, [[REG2]]
3182
%call = call float @foo_float(float %c, float %d)
3283
%call1 = call float @foo_float(float %c, float %d)
3384
unreachable
@@ -38,29 +89,37 @@ declare float @foo_float(float, float)
3889
define void @zero_cycle_regmov_FPR16(half %a, half %b, half %c, half %d) {
3990
entry:
4091
; CHECK-LABEL: t:
41-
; NOTCPU-LINUX: fmov s0, s2
42-
; NOTCPU-LINUX: fmov s1, s3
43-
; NOTCPU-LINUX: fmov [[REG2:s[0-9]+]], s3
44-
; NOTCPU-LINUX: fmov [[REG1:s[0-9]+]], s2
45-
; NOTCPU-LINUX-NEXT: bl {{_?foo_half}}
46-
; NOTCPU-LINUX: fmov s0, [[REG1]]
47-
; NOTCPU-LINUX: fmov s1, [[REG2]]
92+
; NOZCM-FPR128-CPU: fmov s0, s2
93+
; NOZCM-FPR128-CPU: fmov s1, s3
94+
; NOZCM-FPR128-CPU: fmov [[REG2:s[0-9]+]], s3
95+
; NOZCM-FPR128-CPU: fmov [[REG1:s[0-9]+]], s2
96+
; NOZCM-FPR128-CPU-NEXT: bl {{_?foo_half}}
97+
; NOZCM-FPR128-CPU: fmov s0, [[REG1]]
98+
; NOZCM-FPR128-CPU: fmov s1, [[REG2]]
99+
100+
; ZCM-FPR128-CPU: mov.16b [[REG2:v[0-9]+]], v3
101+
; ZCM-FPR128-CPU: mov.16b [[REG1:v[0-9]+]], v2
102+
; ZCM-FPR128-CPU: mov.16b v0, v2
103+
; ZCM-FPR128-CPU: mov.16b v1, v3
104+
; ZCM-FPR128-CPU-NEXT: bl {{_?foo_half}}
105+
; ZCM-FPR128-CPU: mov.16b v0, [[REG1]]
106+
; ZCM-FPR128-CPU: mov.16b v1, [[REG2]]
48107

49-
; NOTCPU-APPLE: fmov s0, s2
50-
; NOTCPU-APPLE: fmov s1, s3
51-
; NOTCPU-APPLE: fmov [[REG2:s[0-9]+]], s3
52-
; NOTCPU-APPLE: fmov [[REG1:s[0-9]+]], s2
53-
; NOTCPU-APPLE-NEXT: bl {{_?foo_half}}
54-
; NOTCPU-APPLE: fmov s0, [[REG1]]
55-
; NOTCPU-APPLE: fmov s1, [[REG2]]
108+
; NOZCM-FPR128-ATTR: fmov [[REG2:s[0-9]+]], s3
109+
; NOZCM-FPR128-ATTR: fmov [[REG1:s[0-9]+]], s2
110+
; NOZCM-FPR128-ATTR: fmov s0, s2
111+
; NOZCM-FPR128-ATTR: fmov s1, s3
112+
; NOZCM-FPR128-ATTR-NEXT: bl {{_?foo_half}}
113+
; NOZCM-FPR128-ATTR: fmov s0, [[REG1]]
114+
; NOZCM-FPR128-ATTR: fmov s1, [[REG2]]
56115

57-
; ATTR: fmov d0, d2
58-
; ATTR: fmov d1, d3
59-
; ATTR: fmov [[REG2:d[0-9]+]], d3
60-
; ATTR: fmov [[REG1:d[0-9]+]], d2
61-
; ATTR-NEXT: bl {{_?foo_half}}
62-
; ATTR: fmov d0, [[REG1]]
63-
; ATTR: fmov d1, [[REG2]]
116+
; ZCM-FPR128-ATTR: mov.16b v0, v2
117+
; ZCM-FPR128-ATTR: mov.16b v1, v3
118+
; ZCM-FPR128-ATTR: mov.16b [[REG2:v[0-9]+]], v3
119+
; ZCM-FPR128-ATTR: mov.16b [[REG1:v[0-9]+]], v2
120+
; ZCM-FPR128-ATTR-NEXT: bl {{_?foo_half}}
121+
; ZCM-FPR128-ATTR: mov.16b v0, [[REG1]]
122+
; ZCM-FPR128-ATTR: mov.16b v1, [[REG2]]
64123
%call = call half @foo_half(half %c, half %d)
65124
%call1 = call half @foo_half(half %c, half %d)
66125
unreachable

0 commit comments

Comments
 (0)