intel
diff --git a/‎llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
Lines changed: 85 additions & 108 deletions b/‎llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
Lines changed: 85 additions & 108 deletions
diff --git a/‎llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
Lines changed: 1 addition & 1 deletion b/‎llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
Lines changed: 1 addition & 1 deletion
diff --git a/‎llvm/test/CodeGen/Thumb2/mve-shuffle.ll
Lines changed: 2 additions & 4 deletions b/‎llvm/test/CodeGen/Thumb2/mve-shuffle.ll
Lines changed: 2 additions & 4 deletions
diff --git a/‎llvm/test/CodeGen/Thumb2/mve-vaddqr.ll
Lines changed: 8 additions & 12 deletions b/‎llvm/test/CodeGen/Thumb2/mve-vaddqr.ll
Lines changed: 8 additions & 12 deletions
@@ -696,7 +696,7 @@ for.cond.cleanup:                                 ; preds = %vector.body, %entry
 }
 
 
-define dso_local void @test_nested(float* noalias nocapture %pInT1, float* noalias nocapture readonly %pOutT1, float* noalias nocapture readonly %pPRT_in, float* noalias nocapture readnone %pPRT_pDst, i32 %numRows, i32 %numCols, i32 %l, float %in) local_unnamed_addr #0 {
+define dso_local void @test_nested(float* noalias nocapture %pInT1, float* noalias nocapture readonly %pOutT1, float* noalias nocapture readonly %pPRT_in, float* noalias nocapture readnone %pPRT_pDst, i32 %numRows, i32 %numCols, i32 %l) local_unnamed_addr #0 {
 ; CHECK-LABEL: test_nested:
 ; CHECK:       @ %bb.0: @ %for.body.us.preheader
 ; CHECK-NEXT:    .save {r4, r5, r6, lr}
 
@@ -512,14 +512,12 @@ entry:
   ret <4 x float> %res
 }
 
-; TODO: Calling convention needs fixing to pass half types directly to functions
-define arm_aapcs_vfpcc <8 x half> @insert_f16(half *%aa) {
+define arm_aapcs_vfpcc <8 x half> @insert_f16(half %a) {
 ; CHECK-LABEL: insert_f16:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldr.16 s0, [r0]
+; CHECK-NEXT:    @ kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    bx lr
 entry:
-  %a = load half, half* %aa
   %res = insertelement <8 x half> undef, half %a, i32 0
   ret <8 x half> %res
 }
 
@@ -86,14 +86,13 @@ entry:
   ret <4 x float> %c
 }
 
-define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16(<8 x half> %src, half *%src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-LABEL: vaddqr_v8f16:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    ldrh r0, [r0]
+; CHECK-NEXT:    vmov.f16 r0, s4
 ; CHECK-NEXT:    vadd.f16 q0, q0, r0
 ; CHECK-NEXT:    bx lr
 entry:
-  %src2 = load half, half *%src2p, align 2
   %i = insertelement <8 x half> undef, half %src2, i32 0
   %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
   %c = fadd <8 x half> %src, %sp
@@ -113,14 +112,13 @@ entry:
   ret <4 x float> %c
 }
 
-define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16_2(<8 x half> %src, half *%src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16_2(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-LABEL: vaddqr_v8f16_2:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    ldrh r0, [r0]
+; CHECK-NEXT:    vmov.f16 r0, s4
 ; CHECK-NEXT:    vadd.f16 q0, q0, r0
 ; CHECK-NEXT:    bx lr
 entry:
-  %src2 = load half, half *%src2p, align 2
   %i = insertelement <8 x half> undef, half %src2, i32 0
   %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
   %c = fadd <8 x half> %sp, %src
@@ -142,14 +140,13 @@ entry:
   ret <4 x float> %c
 }
 
-define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16_3(<8 x half> %src, half *%src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16_3(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-LABEL: vaddqr_v8f16_3:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    ldrh r0, [r0]
+; CHECK-NEXT:    vmov.f16 r0, s4
 ; CHECK-NEXT:    vadd.f16 q0, q0, r0
 ; CHECK-NEXT:    bx lr
 entry:
-  %src2 = load half, half *%src2p, align 2
   %src2bc = bitcast half %src2 to i16
   %i = insertelement <8 x i16> undef, i16 %src2bc, i32 0
   %spbc = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
@@ -173,14 +170,13 @@ entry:
   ret <4 x float> %c
 }
 
-define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16_4(<8 x half> %src, half *%src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16_4(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-LABEL: vaddqr_v8f16_4:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    ldrh r0, [r0]
+; CHECK-NEXT:    vmov.f16 r0, s4
 ; CHECK-NEXT:    vadd.f16 q0, q0, r0
 ; CHECK-NEXT:    bx lr
 entry:
-  %src2 = load half, half *%src2p, align 2
   %src2bc = bitcast half %src2 to i16
   %i = insertelement <8 x i16> undef, i16 %src2bc, i32 0
   %spbc = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
Original file line number	Diff line number	Diff line change
`@@ -696,7 +696,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry`
`696`	`696`	`}`
`697`	`697`
`698`	`698`
`699`		`-define dso_local void @test_nested(float* noalias nocapture %pInT1, float* noalias nocapture readonly %pOutT1, float* noalias nocapture readonly %pPRT_in, float* noalias nocapture readnone %pPRT_pDst, i32 %numRows, i32 %numCols, i32 %l, float %in) local_unnamed_addr #0 {`
	`699`	`+define dso_local void @test_nested(float* noalias nocapture %pInT1, float* noalias nocapture readonly %pOutT1, float* noalias nocapture readonly %pPRT_in, float* noalias nocapture readnone %pPRT_pDst, i32 %numRows, i32 %numCols, i32 %l) local_unnamed_addr #0 {`
`700`	`700`	`; CHECK-LABEL: test_nested:`
`701`	`701`	`; CHECK: @ %bb.0: @ %for.body.us.preheader`
`702`	`702`	`; CHECK-NEXT: .save {r4, r5, r6, lr}`