22; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+b | FileCheck %s --check-prefixes=CHECK,V
33; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+b,+zvfh | FileCheck %s --check-prefixes=CHECK,ZVFH
44
5- declare i32 @llvm.experimental.constrained.fptosi.i32.f64 (double , metadata )
65declare void @g ()
76
8- ; TODO: Merging scalars into vectors is unprofitable because we have no
9- ; vector CSRs which creates additional spills around the call.
107define void @f (ptr %m , ptr %n , ptr %p , ptr %q , ptr %r , ptr %s , double %t ) {
118; CHECK-LABEL: f:
129; CHECK: # %bb.0:
@@ -16,40 +13,40 @@ define void @f(ptr %m, ptr %n, ptr %p, ptr %q, ptr %r, ptr %s, double %t) {
1613; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
1714; CHECK-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
1815; CHECK-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
16+ ; CHECK-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
17+ ; CHECK-NEXT: sd s4, 0(sp) # 8-byte Folded Spill
1918; CHECK-NEXT: .cfi_offset ra, -8
2019; CHECK-NEXT: .cfi_offset s0, -16
2120; CHECK-NEXT: .cfi_offset s1, -24
2221; CHECK-NEXT: .cfi_offset s2, -32
23- ; CHECK-NEXT: csrr a6, vlenb
24- ; CHECK-NEXT: sub sp, sp, a6
25- ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 1 * vlenb
22+ ; CHECK-NEXT: .cfi_offset s3, -40
23+ ; CHECK-NEXT: .cfi_offset s4, -48
2624; CHECK-NEXT: mv s0, a5
2725; CHECK-NEXT: mv s1, a4
2826; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2927; CHECK-NEXT: vle64.v v8, (a0)
3028; CHECK-NEXT: vse64.v v8, (a1)
31- ; CHECK-NEXT: vle64.v v8, (a2)
32- ; CHECK-NEXT: addi a0, sp, 16
33- ; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
29+ ; CHECK-NEXT: ld s3, 0(a2)
30+ ; CHECK-NEXT: ld s4, 8(a2)
3431; CHECK-NEXT: mv s2, a3
3532; CHECK-NEXT: call g
36- ; CHECK-NEXT: addi a0, sp, 16
37- ; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
33+ ; CHECK-NEXT: sd s3, 0(s2)
34+ ; CHECK-NEXT: sd s4, 8(s2)
3835; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
39- ; CHECK-NEXT: vse64.v v8, (s2)
4036; CHECK-NEXT: vle64.v v8, (s1)
4137; CHECK-NEXT: vse64.v v8, (s0)
42- ; CHECK-NEXT: csrr a0, vlenb
43- ; CHECK-NEXT: add sp, sp, a0
44- ; CHECK-NEXT: .cfi_def_cfa sp, 48
4538; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
4639; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
4740; CHECK-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
4841; CHECK-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
42+ ; CHECK-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
43+ ; CHECK-NEXT: ld s4, 0(sp) # 8-byte Folded Reload
4944; CHECK-NEXT: .cfi_restore ra
5045; CHECK-NEXT: .cfi_restore s0
5146; CHECK-NEXT: .cfi_restore s1
5247; CHECK-NEXT: .cfi_restore s2
48+ ; CHECK-NEXT: .cfi_restore s3
49+ ; CHECK-NEXT: .cfi_restore s4
5350; CHECK-NEXT: addi sp, sp, 48
5451; CHECK-NEXT: .cfi_def_cfa_offset 0
5552; CHECK-NEXT: ret
@@ -78,13 +75,13 @@ define void @f(ptr %m, ptr %n, ptr %p, ptr %q, ptr %r, ptr %s, double %t) {
7875 ret void
7976}
8077
81- define void @f1 (ptr %m , ptr %n , ptr % p , ptr %q , ptr %r , ptr %s , double %t ) {
78+ define void @f1 (ptr %p , ptr %q , double %t ) {
8279; CHECK-LABEL: f1:
8380; CHECK: # %bb.0:
8481; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
85- ; CHECK-NEXT: vle64.v v8, (a2 )
82+ ; CHECK-NEXT: vle64.v v8, (a0 )
8683; CHECK-NEXT: fcvt.wu.d a0, fa0, rtz
87- ; CHECK-NEXT: vse64.v v8, (a3 )
84+ ; CHECK-NEXT: vse64.v v8, (a1 )
8885; CHECK-NEXT: ret
8986 %x0 = load i64 , ptr %p
9087 %p.1 = getelementptr i64 , ptr %p , i64 1
@@ -93,7 +90,6 @@ define void @f1(ptr %m, ptr %n, ptr %p, ptr %q, ptr %r, ptr %s, double %t) {
9390 store i64 %x0 , ptr %q
9491 %q.1 = getelementptr i64 , ptr %q , i64 1
9592 store i64 %x1 , ptr %q.1
96-
9793 ret void
9894}
9995
@@ -515,28 +511,26 @@ define void @two_half_unaligned(ptr %p, ptr %q) {
515511; ZVFH-NEXT: .cfi_def_cfa_offset 32
516512; ZVFH-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
517513; ZVFH-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
514+ ; ZVFH-NEXT: fsd fs0, 8(sp) # 8-byte Folded Spill
515+ ; ZVFH-NEXT: fsd fs1, 0(sp) # 8-byte Folded Spill
518516; ZVFH-NEXT: .cfi_offset ra, -8
519517; ZVFH-NEXT: .cfi_offset s0, -16
520- ; ZVFH-NEXT: csrr a2, vlenb
521- ; ZVFH-NEXT: sub sp, sp, a2
522- ; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 1 * vlenb
523- ; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
524- ; ZVFH-NEXT: vle16.v v8, (a0)
525- ; ZVFH-NEXT: addi a0, sp, 16
526- ; ZVFH-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
518+ ; ZVFH-NEXT: .cfi_offset fs0, -24
519+ ; ZVFH-NEXT: .cfi_offset fs1, -32
520+ ; ZVFH-NEXT: flh fs0, 0(a0)
521+ ; ZVFH-NEXT: flh fs1, 2(a0)
527522; ZVFH-NEXT: mv s0, a1
528523; ZVFH-NEXT: call g
529- ; ZVFH-NEXT: addi a0, sp, 16
530- ; ZVFH-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
531- ; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
532- ; ZVFH-NEXT: vse16.v v8, (s0)
533- ; ZVFH-NEXT: csrr a0, vlenb
534- ; ZVFH-NEXT: add sp, sp, a0
535- ; ZVFH-NEXT: .cfi_def_cfa sp, 32
524+ ; ZVFH-NEXT: fsh fs0, 0(s0)
525+ ; ZVFH-NEXT: fsh fs1, 2(s0)
536526; ZVFH-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
537527; ZVFH-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
528+ ; ZVFH-NEXT: fld fs0, 8(sp) # 8-byte Folded Reload
529+ ; ZVFH-NEXT: fld fs1, 0(sp) # 8-byte Folded Reload
538530; ZVFH-NEXT: .cfi_restore ra
539531; ZVFH-NEXT: .cfi_restore s0
532+ ; ZVFH-NEXT: .cfi_restore fs0
533+ ; ZVFH-NEXT: .cfi_restore fs1
540534; ZVFH-NEXT: addi sp, sp, 32
541535; ZVFH-NEXT: .cfi_def_cfa_offset 0
542536; ZVFH-NEXT: ret
@@ -552,9 +546,6 @@ define void @two_half_unaligned(ptr %p, ptr %q) {
552546 ret void
553547}
554548
555-
556- ; TODO: This one is currently a vector which is unprofitable, we should
557- ; use i64 instead.
558549define void @two_float (ptr %p , ptr %q ) {
559550; CHECK-LABEL: two_float:
560551; CHECK: # %bb.0:
@@ -598,28 +589,26 @@ define void @two_float_unaligned(ptr %p, ptr %q) {
598589; CHECK-NEXT: .cfi_def_cfa_offset 32
599590; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
600591; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
592+ ; CHECK-NEXT: fsd fs0, 8(sp) # 8-byte Folded Spill
593+ ; CHECK-NEXT: fsd fs1, 0(sp) # 8-byte Folded Spill
601594; CHECK-NEXT: .cfi_offset ra, -8
602595; CHECK-NEXT: .cfi_offset s0, -16
603- ; CHECK-NEXT: csrr a2, vlenb
604- ; CHECK-NEXT: sub sp, sp, a2
605- ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 1 * vlenb
606- ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
607- ; CHECK-NEXT: vle32.v v8, (a0)
608- ; CHECK-NEXT: addi a0, sp, 16
609- ; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
596+ ; CHECK-NEXT: .cfi_offset fs0, -24
597+ ; CHECK-NEXT: .cfi_offset fs1, -32
598+ ; CHECK-NEXT: flw fs0, 0(a0)
599+ ; CHECK-NEXT: flw fs1, 4(a0)
610600; CHECK-NEXT: mv s0, a1
611601; CHECK-NEXT: call g
612- ; CHECK-NEXT: addi a0, sp, 16
613- ; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
614- ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
615- ; CHECK-NEXT: vse32.v v8, (s0)
616- ; CHECK-NEXT: csrr a0, vlenb
617- ; CHECK-NEXT: add sp, sp, a0
618- ; CHECK-NEXT: .cfi_def_cfa sp, 32
602+ ; CHECK-NEXT: fsw fs0, 0(s0)
603+ ; CHECK-NEXT: fsw fs1, 4(s0)
619604; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
620605; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
606+ ; CHECK-NEXT: fld fs0, 8(sp) # 8-byte Folded Reload
607+ ; CHECK-NEXT: fld fs1, 0(sp) # 8-byte Folded Reload
621608; CHECK-NEXT: .cfi_restore ra
622609; CHECK-NEXT: .cfi_restore s0
610+ ; CHECK-NEXT: .cfi_restore fs0
611+ ; CHECK-NEXT: .cfi_restore fs1
623612; CHECK-NEXT: addi sp, sp, 32
624613; CHECK-NEXT: .cfi_def_cfa_offset 0
625614; CHECK-NEXT: ret
@@ -679,28 +668,26 @@ define void @two_double(ptr %p, ptr %q) {
679668; CHECK-NEXT: .cfi_def_cfa_offset 32
680669; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
681670; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
671+ ; CHECK-NEXT: fsd fs0, 8(sp) # 8-byte Folded Spill
672+ ; CHECK-NEXT: fsd fs1, 0(sp) # 8-byte Folded Spill
682673; CHECK-NEXT: .cfi_offset ra, -8
683674; CHECK-NEXT: .cfi_offset s0, -16
684- ; CHECK-NEXT: csrr a2, vlenb
685- ; CHECK-NEXT: sub sp, sp, a2
686- ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 1 * vlenb
687- ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
688- ; CHECK-NEXT: vle64.v v8, (a0)
689- ; CHECK-NEXT: addi a0, sp, 16
690- ; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
675+ ; CHECK-NEXT: .cfi_offset fs0, -24
676+ ; CHECK-NEXT: .cfi_offset fs1, -32
677+ ; CHECK-NEXT: fld fs0, 0(a0)
678+ ; CHECK-NEXT: fld fs1, 8(a0)
691679; CHECK-NEXT: mv s0, a1
692680; CHECK-NEXT: call g
693- ; CHECK-NEXT: addi a0, sp, 16
694- ; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
695- ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
696- ; CHECK-NEXT: vse64.v v8, (s0)
697- ; CHECK-NEXT: csrr a0, vlenb
698- ; CHECK-NEXT: add sp, sp, a0
699- ; CHECK-NEXT: .cfi_def_cfa sp, 32
681+ ; CHECK-NEXT: fsd fs0, 0(s0)
682+ ; CHECK-NEXT: fsd fs1, 8(s0)
700683; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
701684; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
685+ ; CHECK-NEXT: fld fs0, 8(sp) # 8-byte Folded Reload
686+ ; CHECK-NEXT: fld fs1, 0(sp) # 8-byte Folded Reload
702687; CHECK-NEXT: .cfi_restore ra
703688; CHECK-NEXT: .cfi_restore s0
689+ ; CHECK-NEXT: .cfi_restore fs0
690+ ; CHECK-NEXT: .cfi_restore fs1
704691; CHECK-NEXT: addi sp, sp, 32
705692; CHECK-NEXT: .cfi_def_cfa_offset 0
706693; CHECK-NEXT: ret
0 commit comments