1- ; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s -check-prefixes=NOTCPU-LINUX --match-full-lines
2- ; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=generic | FileCheck %s -check-prefixes=NOTCPU-APPLE --match-full-lines
3- ; RUN: llc < %s -mtriple=arm64-apple-macosx -mattr=+zcm-fpr64 | FileCheck %s -check-prefixes=ATTR --match-full-lines
1+ ; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s -check-prefixes=NOZCM-FPR128-CPU --match-full-lines
2+ ; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=generic | FileCheck %s -check-prefixes=NOZCM-FPR128-CPU --match-full-lines
3+ ; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=apple-m1 | FileCheck %s -check-prefixes=ZCM-FPR128-CPU --match-full-lines
4+ ; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=apple-m1 -mattr=-zcm-fpr128 | FileCheck %s -check-prefixes=NOZCM-FPR128-ATTR --match-full-lines
5+ ; RUN: llc < %s -mtriple=arm64-apple-macosx -mattr=+zcm-fpr128 | FileCheck %s -check-prefixes=ZCM-FPR128-ATTR --match-full-lines
6+
7+ define void @zero_cycle_regmov_FPR64 (double %a , double %b , double %c , double %d ) {
8+ entry:
9+ ; CHECK-LABEL: t:
10+ ; NOZCM-FPR128-CPU: fmov d0, d2
11+ ; NOZCM-FPR128-CPU: fmov d1, d3
12+ ; NOZCM-FPR128-CPU: fmov [[REG2:d[0-9]+]], d3
13+ ; NOZCM-FPR128-CPU: fmov [[REG1:d[0-9]+]], d2
14+ ; NOZCM-FPR128-CPU-NEXT: bl {{_?foo_double}}
15+ ; NOZCM-FPR128-CPU: fmov d0, [[REG1]]
16+ ; NOZCM-FPR128-CPU: fmov d1, [[REG2]]
17+
18+ ; ZCM-FPR128-CPU: mov.16b [[REG2:v[0-9]+]], v3
19+ ; ZCM-FPR128-CPU: mov.16b [[REG1:v[0-9]+]], v2
20+ ; ZCM-FPR128-CPU: mov.16b v0, v2
21+ ; ZCM-FPR128-CPU: mov.16b v1, v3
22+ ; ZCM-FPR128-CPU-NEXT: bl {{_?foo_double}}
23+ ; ZCM-FPR128-CPU: mov.16b v0, [[REG1]]
24+ ; ZCM-FPR128-CPU: mov.16b v1, [[REG2]]
25+
26+ ; NOZCM-FPR128-ATTR: fmov [[REG2:d[0-9]+]], d3
27+ ; NOZCM-FPR128-ATTR: fmov [[REG1:d[0-9]+]], d2
28+ ; NOZCM-FPR128-ATTR: fmov d0, d2
29+ ; NOZCM-FPR128-ATTR: fmov d1, d3
30+ ; NOZCM-FPR128-ATTR-NEXT: bl {{_?foo_double}}
31+ ; NOZCM-FPR128-ATTR: fmov d0, [[REG1]]
32+ ; NOZCM-FPR128-ATTR: fmov d1, [[REG2]]
33+
34+ ; ZCM-FPR128-ATTR: mov.16b v0, v2
35+ ; ZCM-FPR128-ATTR: mov.16b v1, v3
36+ ; ZCM-FPR128-ATTR: mov.16b [[REG2:v[0-9]+]], v3
37+ ; ZCM-FPR128-ATTR: mov.16b [[REG1:v[0-9]+]], v2
38+ ; ZCM-FPR128-ATTR-NEXT: bl {{_?foo_double}}
39+ ; ZCM-FPR128-ATTR: mov.16b v0, [[REG1]]
40+ ; ZCM-FPR128-ATTR: mov.16b v1, [[REG2]]
41+ %call = call double @foo_double (double %c , double %d )
42+ %call1 = call double @foo_double (double %c , double %d )
43+ unreachable
44+ }
45+
46+ declare float @foo_double (double , double )
447
548define void @zero_cycle_regmov_FPR32 (float %a , float %b , float %c , float %d ) {
649entry:
750; CHECK-LABEL: t:
8- ; NOTCPU-LINUX : fmov s0, s2
9- ; NOTCPU-LINUX : fmov s1, s3
10- ; NOTCPU-LINUX : fmov [[REG2:s[0-9]+]], s3
11- ; NOTCPU-LINUX : fmov [[REG1:s[0-9]+]], s2
12- ; NOTCPU-LINUX -NEXT: bl {{_?foo_float}}
13- ; NOTCPU-LINUX : fmov s0, [[REG1]]
14- ; NOTCPU-LINUX : fmov s1, [[REG2]]
51+ ; NOZCM-FPR128-CPU : fmov s0, s2
52+ ; NOZCM-FPR128-CPU : fmov s1, s3
53+ ; NOZCM-FPR128-CPU : fmov [[REG2:s[0-9]+]], s3
54+ ; NOZCM-FPR128-CPU : fmov [[REG1:s[0-9]+]], s2
55+ ; NOZCM-FPR128-CPU -NEXT: bl {{_?foo_float}}
56+ ; NOZCM-FPR128-CPU : fmov s0, [[REG1]]
57+ ; NOZCM-FPR128-CPU : fmov s1, [[REG2]]
1558
16- ; NOTCPU-APPLE: fmov s0, s2
17- ; NOTCPU-APPLE: fmov s1, s3
18- ; NOTCPU-APPLE: fmov [[REG2:s[0-9]+]], s3
19- ; NOTCPU-APPLE: fmov [[REG1:s[0-9]+]], s2
20- ; NOTCPU-APPLE -NEXT: bl {{_?foo_float}}
21- ; NOTCPU-APPLE: fmov s0 , [[REG1]]
22- ; NOTCPU-APPLE: fmov s1 , [[REG2]]
59+ ; ZCM-FPR128-CPU: mov.16b [[REG2:v[0-9]+]], v3
60+ ; ZCM-FPR128-CPU: mov.16b [[REG1:v[0-9]+]], v2
61+ ; ZCM-FPR128-CPU: mov.16b v0, v2
62+ ; ZCM-FPR128-CPU: mov.16b v1, v3
63+ ; ZCM-FPR128-CPU -NEXT: bl {{_?foo_float}}
64+ ; ZCM-FPR128-CPU: mov.16b v0 , [[REG1]]
65+ ; ZCM-FPR128-CPU: mov.16b v1 , [[REG2]]
2366
24- ; ATTR: fmov d0, d2
25- ; ATTR: fmov d1, d3
26- ; ATTR: fmov [[REG2:d[0-9]+]], d3
27- ; ATTR: fmov [[REG1:d[0-9]+]], d2
28- ; ATTR-NEXT: bl {{_?foo_float}}
29- ; ATTR: fmov d0, [[REG1]]
30- ; ATTR: fmov d1, [[REG2]]
67+ ; NOZCM-FPR128-ATTR: fmov [[REG2:s[0-9]+]], s3
68+ ; NOZCM-FPR128-ATTR: fmov [[REG1:s[0-9]+]], s2
69+ ; NOZCM-FPR128-ATTR: fmov s0, s2
70+ ; NOZCM-FPR128-ATTR: fmov s1, s3
71+ ; NOZCM-FPR128-ATTR-NEXT: bl {{_?foo_float}}
72+ ; NOZCM-FPR128-ATTR: fmov s0, [[REG1]]
73+ ; NOZCM-FPR128-ATTR: fmov s1, [[REG2]]
74+
75+ ; ZCM-FPR128-ATTR: mov.16b v0, v2
76+ ; ZCM-FPR128-ATTR: mov.16b v1, v3
77+ ; ZCM-FPR128-ATTR: mov.16b [[REG2:v[0-9]+]], v3
78+ ; ZCM-FPR128-ATTR: mov.16b [[REG1:v[0-9]+]], v2
79+ ; ZCM-FPR128-ATTR-NEXT: bl {{_?foo_float}}
80+ ; ZCM-FPR128-ATTR: mov.16b v0, [[REG1]]
81+ ; ZCM-FPR128-ATTR: mov.16b v1, [[REG2]]
3182 %call = call float @foo_float (float %c , float %d )
3283 %call1 = call float @foo_float (float %c , float %d )
3384 unreachable
@@ -38,29 +89,37 @@ declare float @foo_float(float, float)
3889define void @zero_cycle_regmov_FPR16 (half %a , half %b , half %c , half %d ) {
3990entry:
4091; CHECK-LABEL: t:
41- ; NOTCPU-LINUX: fmov s0, s2
42- ; NOTCPU-LINUX: fmov s1, s3
43- ; NOTCPU-LINUX: fmov [[REG2:s[0-9]+]], s3
44- ; NOTCPU-LINUX: fmov [[REG1:s[0-9]+]], s2
45- ; NOTCPU-LINUX-NEXT: bl {{_?foo_half}}
46- ; NOTCPU-LINUX: fmov s0, [[REG1]]
47- ; NOTCPU-LINUX: fmov s1, [[REG2]]
92+ ; NOZCM-FPR128-CPU: fmov s0, s2
93+ ; NOZCM-FPR128-CPU: fmov s1, s3
94+ ; NOZCM-FPR128-CPU: fmov [[REG2:s[0-9]+]], s3
95+ ; NOZCM-FPR128-CPU: fmov [[REG1:s[0-9]+]], s2
96+ ; NOZCM-FPR128-CPU-NEXT: bl {{_?foo_half}}
97+ ; NOZCM-FPR128-CPU: fmov s0, [[REG1]]
98+ ; NOZCM-FPR128-CPU: fmov s1, [[REG2]]
99+
100+ ; ZCM-FPR128-CPU: mov.16b [[REG2:v[0-9]+]], v3
101+ ; ZCM-FPR128-CPU: mov.16b [[REG1:v[0-9]+]], v2
102+ ; ZCM-FPR128-CPU: mov.16b v0, v2
103+ ; ZCM-FPR128-CPU: mov.16b v1, v3
104+ ; ZCM-FPR128-CPU-NEXT: bl {{_?foo_half}}
105+ ; ZCM-FPR128-CPU: mov.16b v0, [[REG1]]
106+ ; ZCM-FPR128-CPU: mov.16b v1, [[REG2]]
48107
49- ; NOTCPU-APPLE : fmov s0, s2
50- ; NOTCPU-APPLE : fmov s1, s3
51- ; NOTCPU-APPLE : fmov [[REG2:s[0-9]+]], s3
52- ; NOTCPU-APPLE : fmov [[REG1:s[0-9]+]], s2
53- ; NOTCPU-APPLE -NEXT: bl {{_?foo_half}}
54- ; NOTCPU-APPLE : fmov s0, [[REG1]]
55- ; NOTCPU-APPLE : fmov s1, [[REG2]]
108+ ; NOZCM-FPR128-ATTR : fmov [[REG2:s[0-9]+]], s3
109+ ; NOZCM-FPR128-ATTR : fmov [[REG1:s[0-9]+]], s2
110+ ; NOZCM-FPR128-ATTR : fmov s0, s2
111+ ; NOZCM-FPR128-ATTR : fmov s1, s3
112+ ; NOZCM-FPR128-ATTR -NEXT: bl {{_?foo_half}}
113+ ; NOZCM-FPR128-ATTR : fmov s0, [[REG1]]
114+ ; NOZCM-FPR128-ATTR : fmov s1, [[REG2]]
56115
57- ; ATTR: fmov d0, d2
58- ; ATTR: fmov d1, d3
59- ; ATTR: fmov [[REG2:d [0-9]+]], d3
60- ; ATTR: fmov [[REG1:d [0-9]+]], d2
61- ; ATTR-NEXT: bl {{_?foo_half}}
62- ; ATTR: fmov d0 , [[REG1]]
63- ; ATTR: fmov d1 , [[REG2]]
116+ ; ZCM-FPR128- ATTR: mov.16b v0, v2
117+ ; ZCM-FPR128- ATTR: mov.16b v1, v3
118+ ; ZCM-FPR128- ATTR: mov.16b [[REG2:v [0-9]+]], v3
119+ ; ZCM-FPR128- ATTR: mov.16b [[REG1:v [0-9]+]], v2
120+ ; ZCM-FPR128- ATTR-NEXT: bl {{_?foo_half}}
121+ ; ZCM-FPR128- ATTR: mov.16b v0 , [[REG1]]
122+ ; ZCM-FPR128- ATTR: mov.16b v1 , [[REG2]]
64123 %call = call half @foo_half (half %c , half %d )
65124 %call1 = call half @foo_half (half %c , half %d )
66125 unreachable
0 commit comments