Skip to content

Commit 9390b85

Browse files
committed
[ARM] Use half directly for args/return types in test. NFC
Until fairly recently the calling convention for IR half was not handled correctly in the ARM backend, meaning we needed to pass pointers that were loaded/stored. Now that that is fixed we can switch to using the type directly instead.
1 parent 93eef7d commit 9390b85

File tree

9 files changed

+2874
-3126
lines changed

9 files changed

+2874
-3126
lines changed

llvm/test/CodeGen/Thumb2/mve-float16regloops.ll

Lines changed: 85 additions & 108 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/Thumb2/mve-float32regloops.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -696,7 +696,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry
696696
}
697697

698698

699-
define dso_local void @test_nested(float* noalias nocapture %pInT1, float* noalias nocapture readonly %pOutT1, float* noalias nocapture readonly %pPRT_in, float* noalias nocapture readnone %pPRT_pDst, i32 %numRows, i32 %numCols, i32 %l, float %in) local_unnamed_addr #0 {
699+
define dso_local void @test_nested(float* noalias nocapture %pInT1, float* noalias nocapture readonly %pOutT1, float* noalias nocapture readonly %pPRT_in, float* noalias nocapture readnone %pPRT_pDst, i32 %numRows, i32 %numCols, i32 %l) local_unnamed_addr #0 {
700700
; CHECK-LABEL: test_nested:
701701
; CHECK: @ %bb.0: @ %for.body.us.preheader
702702
; CHECK-NEXT: .save {r4, r5, r6, lr}

llvm/test/CodeGen/Thumb2/mve-shuffle.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -512,14 +512,12 @@ entry:
512512
ret <4 x float> %res
513513
}
514514

515-
; TODO: Calling convention needs fixing to pass half types directly to functions
516-
define arm_aapcs_vfpcc <8 x half> @insert_f16(half *%aa) {
515+
define arm_aapcs_vfpcc <8 x half> @insert_f16(half %a) {
517516
; CHECK-LABEL: insert_f16:
518517
; CHECK: @ %bb.0: @ %entry
519-
; CHECK-NEXT: vldr.16 s0, [r0]
518+
; CHECK-NEXT: @ kill: def $s0 killed $s0 def $q0
520519
; CHECK-NEXT: bx lr
521520
entry:
522-
%a = load half, half* %aa
523521
%res = insertelement <8 x half> undef, half %a, i32 0
524522
ret <8 x half> %res
525523
}

llvm/test/CodeGen/Thumb2/mve-vaddqr.ll

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -86,14 +86,13 @@ entry:
8686
ret <4 x float> %c
8787
}
8888

89-
define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16(<8 x half> %src, half *%src2p, <8 x half> %a, <8 x half> %b) {
89+
define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
9090
; CHECK-LABEL: vaddqr_v8f16:
9191
; CHECK: @ %bb.0: @ %entry
92-
; CHECK-NEXT: ldrh r0, [r0]
92+
; CHECK-NEXT: vmov.f16 r0, s4
9393
; CHECK-NEXT: vadd.f16 q0, q0, r0
9494
; CHECK-NEXT: bx lr
9595
entry:
96-
%src2 = load half, half *%src2p, align 2
9796
%i = insertelement <8 x half> undef, half %src2, i32 0
9897
%sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
9998
%c = fadd <8 x half> %src, %sp
@@ -113,14 +112,13 @@ entry:
113112
ret <4 x float> %c
114113
}
115114

116-
define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16_2(<8 x half> %src, half *%src2p, <8 x half> %a, <8 x half> %b) {
115+
define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16_2(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
117116
; CHECK-LABEL: vaddqr_v8f16_2:
118117
; CHECK: @ %bb.0: @ %entry
119-
; CHECK-NEXT: ldrh r0, [r0]
118+
; CHECK-NEXT: vmov.f16 r0, s4
120119
; CHECK-NEXT: vadd.f16 q0, q0, r0
121120
; CHECK-NEXT: bx lr
122121
entry:
123-
%src2 = load half, half *%src2p, align 2
124122
%i = insertelement <8 x half> undef, half %src2, i32 0
125123
%sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
126124
%c = fadd <8 x half> %sp, %src
@@ -142,14 +140,13 @@ entry:
142140
ret <4 x float> %c
143141
}
144142

145-
define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16_3(<8 x half> %src, half *%src2p, <8 x half> %a, <8 x half> %b) {
143+
define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16_3(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
146144
; CHECK-LABEL: vaddqr_v8f16_3:
147145
; CHECK: @ %bb.0: @ %entry
148-
; CHECK-NEXT: ldrh r0, [r0]
146+
; CHECK-NEXT: vmov.f16 r0, s4
149147
; CHECK-NEXT: vadd.f16 q0, q0, r0
150148
; CHECK-NEXT: bx lr
151149
entry:
152-
%src2 = load half, half *%src2p, align 2
153150
%src2bc = bitcast half %src2 to i16
154151
%i = insertelement <8 x i16> undef, i16 %src2bc, i32 0
155152
%spbc = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
@@ -173,14 +170,13 @@ entry:
173170
ret <4 x float> %c
174171
}
175172

176-
define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16_4(<8 x half> %src, half *%src2p, <8 x half> %a, <8 x half> %b) {
173+
define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16_4(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
177174
; CHECK-LABEL: vaddqr_v8f16_4:
178175
; CHECK: @ %bb.0: @ %entry
179-
; CHECK-NEXT: ldrh r0, [r0]
176+
; CHECK-NEXT: vmov.f16 r0, s4
180177
; CHECK-NEXT: vadd.f16 q0, q0, r0
181178
; CHECK-NEXT: bx lr
182179
entry:
183-
%src2 = load half, half *%src2p, align 2
184180
%src2bc = bitcast half %src2 to i16
185181
%i = insertelement <8 x i16> undef, i16 %src2bc, i32 0
186182
%spbc = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer

0 commit comments

Comments
 (0)