11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2- ; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s --check-prefixes=CHECK,V
3- ; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh | FileCheck %s --check-prefixes=CHECK,ZVFH
2+ ; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+b | FileCheck %s --check-prefixes=CHECK,V
3+ ; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+b,+ zvfh | FileCheck %s --check-prefixes=CHECK,ZVFH
44
55declare i32 @llvm.experimental.constrained.fptosi.i32.f64 (double , metadata )
66declare void @g ()
@@ -135,6 +135,48 @@ define void @i8_i16(ptr %p, ptr %q) {
135135 ret void
136136}
137137
138+ define void @i8_i16_rotate (ptr %p , ptr %q ) {
139+ ; CHECK-LABEL: i8_i16_rotate:
140+ ; CHECK: # %bb.0:
141+ ; CHECK-NEXT: addi sp, sp, -32
142+ ; CHECK-NEXT: .cfi_def_cfa_offset 32
143+ ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
144+ ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
145+ ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
146+ ; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
147+ ; CHECK-NEXT: .cfi_offset ra, -8
148+ ; CHECK-NEXT: .cfi_offset s0, -16
149+ ; CHECK-NEXT: .cfi_offset s1, -24
150+ ; CHECK-NEXT: .cfi_offset s2, -32
151+ ; CHECK-NEXT: lbu s1, 0(a0)
152+ ; CHECK-NEXT: lbu s2, 1(a0)
153+ ; CHECK-NEXT: mv s0, a1
154+ ; CHECK-NEXT: call g
155+ ; CHECK-NEXT: sb s2, 0(s0)
156+ ; CHECK-NEXT: sb s1, 1(s0)
157+ ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
158+ ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
159+ ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
160+ ; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
161+ ; CHECK-NEXT: .cfi_restore ra
162+ ; CHECK-NEXT: .cfi_restore s0
163+ ; CHECK-NEXT: .cfi_restore s1
164+ ; CHECK-NEXT: .cfi_restore s2
165+ ; CHECK-NEXT: addi sp, sp, 32
166+ ; CHECK-NEXT: .cfi_def_cfa_offset 0
167+ ; CHECK-NEXT: ret
168+ %p0 = getelementptr i8 , ptr %p , i64 0
169+ %p1 = getelementptr i8 , ptr %p , i64 1
170+ %x0 = load i8 , ptr %p0 , align 2
171+ %x1 = load i8 , ptr %p1
172+ call void @g ()
173+ %q0 = getelementptr i8 , ptr %q , i64 0
174+ %q1 = getelementptr i8 , ptr %q , i64 1
175+ store i8 %x1 , ptr %q0 , align 2
176+ store i8 %x0 , ptr %q1
177+ ret void
178+ }
179+
138180; We could reorder the first call and the load here to enable
139181; merging, but don't currently do so.
140182define void @i8_i16_resched_readnone_ld (ptr %p , ptr %q ) {
@@ -228,6 +270,78 @@ define void @i8_i16_resched_readnone_st(ptr %p, ptr %q) {
228270 ret void
229271}
230272
273+ define void @i32_i64 (ptr %p , ptr %q ) {
274+ ; CHECK-LABEL: i32_i64:
275+ ; CHECK: # %bb.0:
276+ ; CHECK-NEXT: addi sp, sp, -32
277+ ; CHECK-NEXT: .cfi_def_cfa_offset 32
278+ ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
279+ ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
280+ ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
281+ ; CHECK-NEXT: .cfi_offset ra, -8
282+ ; CHECK-NEXT: .cfi_offset s0, -16
283+ ; CHECK-NEXT: .cfi_offset s1, -24
284+ ; CHECK-NEXT: ld s1, 0(a0)
285+ ; CHECK-NEXT: mv s0, a1
286+ ; CHECK-NEXT: call g
287+ ; CHECK-NEXT: sd s1, 0(s0)
288+ ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
289+ ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
290+ ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
291+ ; CHECK-NEXT: .cfi_restore ra
292+ ; CHECK-NEXT: .cfi_restore s0
293+ ; CHECK-NEXT: .cfi_restore s1
294+ ; CHECK-NEXT: addi sp, sp, 32
295+ ; CHECK-NEXT: .cfi_def_cfa_offset 0
296+ ; CHECK-NEXT: ret
297+ %p0 = getelementptr i8 , ptr %p , i64 0
298+ %p1 = getelementptr i8 , ptr %p , i64 4
299+ %x0 = load i32 , ptr %p0 , align 8
300+ %x1 = load i32 , ptr %p1
301+ call void @g ()
302+ %q0 = getelementptr i8 , ptr %q , i64 0
303+ %q1 = getelementptr i8 , ptr %q , i64 4
304+ store i32 %x0 , ptr %q0 , align 8
305+ store i32 %x1 , ptr %q1
306+ ret void
307+ }
308+
309+ define void @i32_i64_rotate (ptr %p , ptr %q ) {
310+ ; CHECK-LABEL: i32_i64_rotate:
311+ ; CHECK: # %bb.0:
312+ ; CHECK-NEXT: addi sp, sp, -32
313+ ; CHECK-NEXT: .cfi_def_cfa_offset 32
314+ ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
315+ ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
316+ ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
317+ ; CHECK-NEXT: .cfi_offset ra, -8
318+ ; CHECK-NEXT: .cfi_offset s0, -16
319+ ; CHECK-NEXT: .cfi_offset s1, -24
320+ ; CHECK-NEXT: mv s0, a1
321+ ; CHECK-NEXT: ld a0, 0(a0)
322+ ; CHECK-NEXT: rori s1, a0, 32
323+ ; CHECK-NEXT: call g
324+ ; CHECK-NEXT: sd s1, 0(s0)
325+ ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
326+ ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
327+ ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
328+ ; CHECK-NEXT: .cfi_restore ra
329+ ; CHECK-NEXT: .cfi_restore s0
330+ ; CHECK-NEXT: .cfi_restore s1
331+ ; CHECK-NEXT: addi sp, sp, 32
332+ ; CHECK-NEXT: .cfi_def_cfa_offset 0
333+ ; CHECK-NEXT: ret
334+ %p0 = getelementptr i8 , ptr %p , i64 0
335+ %p1 = getelementptr i8 , ptr %p , i64 4
336+ %x0 = load i32 , ptr %p0 , align 8
337+ %x1 = load i32 , ptr %p1
338+ call void @g ()
339+ %q0 = getelementptr i8 , ptr %q , i64 0
340+ %q1 = getelementptr i8 , ptr %q , i64 4
341+ store i32 %x1 , ptr %q0 , align 8
342+ store i32 %x0 , ptr %q1
343+ ret void
344+ }
231345
232346; Merging vectors is profitable, it reduces pressure within a single
233347; register class.
@@ -305,8 +419,7 @@ define void @v16i8_v32i8(ptr %p, ptr %q) {
305419; CHECK-NEXT: vsetvli zero, s1, e8, m2, ta, ma
306420; CHECK-NEXT: vse8.v v8, (s0)
307421; CHECK-NEXT: csrr a0, vlenb
308- ; CHECK-NEXT: slli a0, a0, 1
309- ; CHECK-NEXT: add sp, sp, a0
422+ ; CHECK-NEXT: sh1add sp, a0, sp
310423; CHECK-NEXT: .cfi_def_cfa sp, 64
311424; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
312425; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
@@ -329,9 +442,44 @@ define void @v16i8_v32i8(ptr %p, ptr %q) {
329442 ret void
330443}
331444
332- ; TODO: We fail to merge these, which would be profitable.
333445define void @two_half (ptr %p , ptr %q ) {
334- ; V-LABEL: two_half:
446+ ; CHECK-LABEL: two_half:
447+ ; CHECK: # %bb.0:
448+ ; CHECK-NEXT: addi sp, sp, -32
449+ ; CHECK-NEXT: .cfi_def_cfa_offset 32
450+ ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
451+ ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
452+ ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
453+ ; CHECK-NEXT: .cfi_offset ra, -8
454+ ; CHECK-NEXT: .cfi_offset s0, -16
455+ ; CHECK-NEXT: .cfi_offset s1, -24
456+ ; CHECK-NEXT: lw s1, 0(a0)
457+ ; CHECK-NEXT: mv s0, a1
458+ ; CHECK-NEXT: call g
459+ ; CHECK-NEXT: sw s1, 0(s0)
460+ ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
461+ ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
462+ ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
463+ ; CHECK-NEXT: .cfi_restore ra
464+ ; CHECK-NEXT: .cfi_restore s0
465+ ; CHECK-NEXT: .cfi_restore s1
466+ ; CHECK-NEXT: addi sp, sp, 32
467+ ; CHECK-NEXT: .cfi_def_cfa_offset 0
468+ ; CHECK-NEXT: ret
469+ %p0 = getelementptr i8 , ptr %p , i64 0
470+ %p1 = getelementptr i8 , ptr %p , i64 2
471+ %x0 = load half , ptr %p0 , align 4
472+ %x1 = load half , ptr %p1
473+ call void @g ()
474+ %q0 = getelementptr i8 , ptr %q , i64 0
475+ %q1 = getelementptr i8 , ptr %q , i64 2
476+ store half %x0 , ptr %q0 , align 4
477+ store half %x1 , ptr %q1
478+ ret void
479+ }
480+
481+ define void @two_half_unaligned (ptr %p , ptr %q ) {
482+ ; V-LABEL: two_half_unaligned:
335483; V: # %bb.0:
336484; V-NEXT: addi sp, sp, -32
337485; V-NEXT: .cfi_def_cfa_offset 32
@@ -361,7 +509,7 @@ define void @two_half(ptr %p, ptr %q) {
361509; V-NEXT: .cfi_def_cfa_offset 0
362510; V-NEXT: ret
363511;
364- ; ZVFH-LABEL: two_half :
512+ ; ZVFH-LABEL: two_half_unaligned :
365513; ZVFH: # %bb.0:
366514; ZVFH-NEXT: addi sp, sp, -32
367515; ZVFH-NEXT: .cfi_def_cfa_offset 32
@@ -404,6 +552,7 @@ define void @two_half(ptr %p, ptr %q) {
404552 ret void
405553}
406554
555+
407556; TODO: This one is currently a vector which is unprofitable, we should
408557; use i64 instead.
409558define void @two_float (ptr %p , ptr %q ) {
@@ -413,6 +562,42 @@ define void @two_float(ptr %p, ptr %q) {
413562; CHECK-NEXT: .cfi_def_cfa_offset 32
414563; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
415564; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
565+ ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
566+ ; CHECK-NEXT: .cfi_offset ra, -8
567+ ; CHECK-NEXT: .cfi_offset s0, -16
568+ ; CHECK-NEXT: .cfi_offset s1, -24
569+ ; CHECK-NEXT: ld s1, 0(a0)
570+ ; CHECK-NEXT: mv s0, a1
571+ ; CHECK-NEXT: call g
572+ ; CHECK-NEXT: sd s1, 0(s0)
573+ ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
574+ ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
575+ ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
576+ ; CHECK-NEXT: .cfi_restore ra
577+ ; CHECK-NEXT: .cfi_restore s0
578+ ; CHECK-NEXT: .cfi_restore s1
579+ ; CHECK-NEXT: addi sp, sp, 32
580+ ; CHECK-NEXT: .cfi_def_cfa_offset 0
581+ ; CHECK-NEXT: ret
582+ %p0 = getelementptr i8 , ptr %p , i64 0
583+ %p1 = getelementptr i8 , ptr %p , i64 4
584+ %x0 = load float , ptr %p0 , align 8
585+ %x1 = load float , ptr %p1
586+ call void @g ()
587+ %q0 = getelementptr i8 , ptr %q , i64 0
588+ %q1 = getelementptr i8 , ptr %q , i64 4
589+ store float %x0 , ptr %q0 , align 8
590+ store float %x1 , ptr %q1
591+ ret void
592+ }
593+
594+ define void @two_float_unaligned (ptr %p , ptr %q ) {
595+ ; CHECK-LABEL: two_float_unaligned:
596+ ; CHECK: # %bb.0:
597+ ; CHECK-NEXT: addi sp, sp, -32
598+ ; CHECK-NEXT: .cfi_def_cfa_offset 32
599+ ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
600+ ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
416601; CHECK-NEXT: .cfi_offset ra, -8
417602; CHECK-NEXT: .cfi_offset s0, -16
418603; CHECK-NEXT: csrr a2, vlenb
@@ -450,6 +635,43 @@ define void @two_float(ptr %p, ptr %q) {
450635 ret void
451636}
452637
638+ define void @two_float_rotate (ptr %p , ptr %q ) {
639+ ; CHECK-LABEL: two_float_rotate:
640+ ; CHECK: # %bb.0:
641+ ; CHECK-NEXT: addi sp, sp, -32
642+ ; CHECK-NEXT: .cfi_def_cfa_offset 32
643+ ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
644+ ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
645+ ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
646+ ; CHECK-NEXT: .cfi_offset ra, -8
647+ ; CHECK-NEXT: .cfi_offset s0, -16
648+ ; CHECK-NEXT: .cfi_offset s1, -24
649+ ; CHECK-NEXT: mv s0, a1
650+ ; CHECK-NEXT: ld a0, 0(a0)
651+ ; CHECK-NEXT: rori s1, a0, 32
652+ ; CHECK-NEXT: call g
653+ ; CHECK-NEXT: sd s1, 0(s0)
654+ ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
655+ ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
656+ ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
657+ ; CHECK-NEXT: .cfi_restore ra
658+ ; CHECK-NEXT: .cfi_restore s0
659+ ; CHECK-NEXT: .cfi_restore s1
660+ ; CHECK-NEXT: addi sp, sp, 32
661+ ; CHECK-NEXT: .cfi_def_cfa_offset 0
662+ ; CHECK-NEXT: ret
663+ %p0 = getelementptr i8 , ptr %p , i64 0
664+ %p1 = getelementptr i8 , ptr %p , i64 4
665+ %x0 = load float , ptr %p0 , align 8
666+ %x1 = load float , ptr %p1
667+ call void @g ()
668+ %q0 = getelementptr i8 , ptr %q , i64 0
669+ %q1 = getelementptr i8 , ptr %q , i64 4
670+ store float %x1 , ptr %q0 , align 8
671+ store float %x0 , ptr %q1
672+ ret void
673+ }
674+
453675define void @two_double (ptr %p , ptr %q ) {
454676; CHECK-LABEL: two_double:
455677; CHECK: # %bb.0:
0 commit comments