-
Notifications
You must be signed in to change notification settings - Fork 15.4k
LoongArch: Add test for sincos intrinsic #147471
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
|
@llvm/pr-subscribers-backend-loongarch Author: Matt Arsenault (arsenm) ChangesPatch is 35.05 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/147471.diff 1 Files Affected:
diff --git a/llvm/test/CodeGen/LoongArch/llvm.sincos.ll b/llvm/test/CodeGen/LoongArch/llvm.sincos.ll
new file mode 100644
index 0000000000000..ffedd7f9e9438
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/llvm.sincos.ll
@@ -0,0 +1,866 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64
+
+define { half, half } @test_sincos_f16(half %a) #0 {
+; LA32-LABEL: test_sincos_f16:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -32
+; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill
+; LA32-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill
+; LA32-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA32-NEXT: bl __extendhfsf2
+; LA32-NEXT: fmov.s $fs0, $fa0
+; LA32-NEXT: bl cosf
+; LA32-NEXT: bl __truncsfhf2
+; LA32-NEXT: fmov.s $fs1, $fa0
+; LA32-NEXT: fmov.s $fa0, $fs0
+; LA32-NEXT: bl sinf
+; LA32-NEXT: movfr2gr.s $fp, $fs1
+; LA32-NEXT: bl __truncsfhf2
+; LA32-NEXT: movfr2gr.s $a0, $fa0
+; LA32-NEXT: lu12i.w $a1, -16
+; LA32-NEXT: or $a0, $a0, $a1
+; LA32-NEXT: movgr2fr.w $fa0, $a0
+; LA32-NEXT: or $a0, $fp, $a1
+; LA32-NEXT: movgr2fr.w $fa1, $a0
+; LA32-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload
+; LA32-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload
+; LA32-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 32
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_sincos_f16:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -32
+; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs1, $sp, 0 # 8-byte Folded Spill
+; LA64-NEXT: pcaddu18i $ra, %call36(__extendhfsf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: fmov.s $fs0, $fa0
+; LA64-NEXT: pcaddu18i $ra, %call36(cosf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: pcaddu18i $ra, %call36(__truncsfhf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: fmov.s $fs1, $fa0
+; LA64-NEXT: fmov.s $fa0, $fs0
+; LA64-NEXT: pcaddu18i $ra, %call36(sinf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: movfr2gr.s $fp, $fs1
+; LA64-NEXT: pcaddu18i $ra, %call36(__truncsfhf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: movfr2gr.s $a0, $fa0
+; LA64-NEXT: lu12i.w $a1, -16
+; LA64-NEXT: or $a0, $a0, $a1
+; LA64-NEXT: movgr2fr.w $fa0, $a0
+; LA64-NEXT: or $a0, $fp, $a1
+; LA64-NEXT: movgr2fr.w $fa1, $a0
+; LA64-NEXT: fld.d $fs1, $sp, 0 # 8-byte Folded Reload
+; LA64-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 32
+; LA64-NEXT: ret
+ %result = call { half, half } @llvm.sincos.f16(half %a)
+ ret { half, half } %result
+}
+
+define half @test_sincos_f16_only_use_sin(half %a) #0 {
+; LA32-LABEL: test_sincos_f16_only_use_sin:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: bl __extendhfsf2
+; LA32-NEXT: bl sinf
+; LA32-NEXT: bl __truncsfhf2
+; LA32-NEXT: movfr2gr.s $a0, $fa0
+; LA32-NEXT: lu12i.w $a1, -16
+; LA32-NEXT: or $a0, $a0, $a1
+; LA32-NEXT: movgr2fr.w $fa0, $a0
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_sincos_f16_only_use_sin:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: pcaddu18i $ra, %call36(__extendhfsf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: pcaddu18i $ra, %call36(sinf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: pcaddu18i $ra, %call36(__truncsfhf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: movfr2gr.s $a0, $fa0
+; LA64-NEXT: lu12i.w $a1, -16
+; LA64-NEXT: or $a0, $a0, $a1
+; LA64-NEXT: movgr2fr.w $fa0, $a0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+ %result = call { half, half } @llvm.sincos.f16(half %a)
+ %result.0 = extractvalue { half, half } %result, 0
+ ret half %result.0
+}
+
+define half @test_sincos_f16_only_use_cos(half %a) #0 {
+; LA32-LABEL: test_sincos_f16_only_use_cos:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: bl __extendhfsf2
+; LA32-NEXT: bl cosf
+; LA32-NEXT: bl __truncsfhf2
+; LA32-NEXT: movfr2gr.s $a0, $fa0
+; LA32-NEXT: lu12i.w $a1, -16
+; LA32-NEXT: or $a0, $a0, $a1
+; LA32-NEXT: movgr2fr.w $fa0, $a0
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_sincos_f16_only_use_cos:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: pcaddu18i $ra, %call36(__extendhfsf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: pcaddu18i $ra, %call36(cosf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: pcaddu18i $ra, %call36(__truncsfhf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: movfr2gr.s $a0, $fa0
+; LA64-NEXT: lu12i.w $a1, -16
+; LA64-NEXT: or $a0, $a0, $a1
+; LA64-NEXT: movgr2fr.w $fa0, $a0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+ %result = call { half, half } @llvm.sincos.f16(half %a)
+ %result.1 = extractvalue { half, half } %result, 1
+ ret half %result.1
+}
+
+define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) #0 {
+; LA32-LABEL: test_sincos_v2f16:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -64
+; LA32-NEXT: st.w $ra, $sp, 60 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 56 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s0, $sp, 52 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s1, $sp, 48 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s2, $sp, 44 # 4-byte Folded Spill
+; LA32-NEXT: fst.d $fs0, $sp, 32 # 8-byte Folded Spill
+; LA32-NEXT: fst.d $fs1, $sp, 24 # 8-byte Folded Spill
+; LA32-NEXT: fst.d $fs2, $sp, 16 # 8-byte Folded Spill
+; LA32-NEXT: fst.d $fs3, $sp, 8 # 8-byte Folded Spill
+; LA32-NEXT: move $fp, $a0
+; LA32-NEXT: movgr2fr.w $fs0, $a2
+; LA32-NEXT: movgr2fr.w $fa0, $a1
+; LA32-NEXT: bl __extendhfsf2
+; LA32-NEXT: fmov.s $fs1, $fa0
+; LA32-NEXT: bl cosf
+; LA32-NEXT: bl __truncsfhf2
+; LA32-NEXT: fmov.s $fs2, $fa0
+; LA32-NEXT: fmov.s $fa0, $fs0
+; LA32-NEXT: bl __extendhfsf2
+; LA32-NEXT: fmov.s $fs0, $fa0
+; LA32-NEXT: bl sinf
+; LA32-NEXT: bl __truncsfhf2
+; LA32-NEXT: fmov.s $fs3, $fa0
+; LA32-NEXT: fmov.s $fa0, $fs1
+; LA32-NEXT: bl sinf
+; LA32-NEXT: bl __truncsfhf2
+; LA32-NEXT: fmov.s $fs1, $fa0
+; LA32-NEXT: fmov.s $fa0, $fs0
+; LA32-NEXT: bl cosf
+; LA32-NEXT: movfr2gr.s $s0, $fs1
+; LA32-NEXT: movfr2gr.s $s1, $fs3
+; LA32-NEXT: movfr2gr.s $s2, $fs2
+; LA32-NEXT: bl __truncsfhf2
+; LA32-NEXT: movfr2gr.s $a0, $fa0
+; LA32-NEXT: st.h $a0, $fp, 6
+; LA32-NEXT: st.h $s2, $fp, 4
+; LA32-NEXT: st.h $s1, $fp, 2
+; LA32-NEXT: st.h $s0, $fp, 0
+; LA32-NEXT: fld.d $fs3, $sp, 8 # 8-byte Folded Reload
+; LA32-NEXT: fld.d $fs2, $sp, 16 # 8-byte Folded Reload
+; LA32-NEXT: fld.d $fs1, $sp, 24 # 8-byte Folded Reload
+; LA32-NEXT: fld.d $fs0, $sp, 32 # 8-byte Folded Reload
+; LA32-NEXT: ld.w $s2, $sp, 44 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s1, $sp, 48 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s0, $sp, 52 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $fp, $sp, 56 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 60 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 64
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_sincos_v2f16:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -80
+; LA64-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs0, $sp, 32 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs1, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs2, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs3, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: move $s0, $a2
+; LA64-NEXT: move $fp, $a0
+; LA64-NEXT: movgr2fr.w $fa0, $a1
+; LA64-NEXT: pcaddu18i $ra, %call36(__extendhfsf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: fmov.s $fs0, $fa0
+; LA64-NEXT: pcaddu18i $ra, %call36(cosf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: pcaddu18i $ra, %call36(__truncsfhf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: fmov.s $fs1, $fa0
+; LA64-NEXT: movgr2fr.w $fa0, $s0
+; LA64-NEXT: pcaddu18i $ra, %call36(__extendhfsf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: fmov.s $fs2, $fa0
+; LA64-NEXT: pcaddu18i $ra, %call36(sinf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: pcaddu18i $ra, %call36(__truncsfhf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: fmov.s $fs3, $fa0
+; LA64-NEXT: fmov.s $fa0, $fs0
+; LA64-NEXT: pcaddu18i $ra, %call36(sinf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: pcaddu18i $ra, %call36(__truncsfhf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: fmov.s $fs0, $fa0
+; LA64-NEXT: fmov.s $fa0, $fs2
+; LA64-NEXT: pcaddu18i $ra, %call36(cosf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: movfr2gr.s $s0, $fs0
+; LA64-NEXT: movfr2gr.s $s1, $fs3
+; LA64-NEXT: movfr2gr.s $s2, $fs1
+; LA64-NEXT: pcaddu18i $ra, %call36(__truncsfhf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: movfr2gr.s $a0, $fa0
+; LA64-NEXT: st.h $a0, $fp, 6
+; LA64-NEXT: st.h $s2, $fp, 4
+; LA64-NEXT: st.h $s1, $fp, 2
+; LA64-NEXT: st.h $s0, $fp, 0
+; LA64-NEXT: fld.d $fs3, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: fld.d $fs2, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT: fld.d $fs1, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT: fld.d $fs0, $sp, 32 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 80
+; LA64-NEXT: ret
+ %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a)
+ ret { <2 x half>, <2 x half> } %result
+}
+
+define { float, float } @test_sincos_f32(float %a) #0 {
+; LA32-LABEL: test_sincos_f32:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -32
+; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
+; LA32-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill
+; LA32-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA32-NEXT: fmov.s $fs0, $fa0
+; LA32-NEXT: bl sinf
+; LA32-NEXT: fmov.s $fs1, $fa0
+; LA32-NEXT: fmov.s $fa0, $fs0
+; LA32-NEXT: bl cosf
+; LA32-NEXT: fmov.s $fa1, $fa0
+; LA32-NEXT: fmov.s $fa0, $fs1
+; LA32-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload
+; LA32-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 32
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_sincos_f32:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -32
+; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: fmov.s $fs0, $fa0
+; LA64-NEXT: pcaddu18i $ra, %call36(sinf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: fmov.s $fs1, $fa0
+; LA64-NEXT: fmov.s $fa0, $fs0
+; LA64-NEXT: pcaddu18i $ra, %call36(cosf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: fmov.s $fa1, $fa0
+; LA64-NEXT: fmov.s $fa0, $fs1
+; LA64-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 32
+; LA64-NEXT: ret
+ %result = call { float, float } @llvm.sincos.f32(float %a)
+ ret { float, float } %result
+}
+
+define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) #0 {
+; LA32-LABEL: test_sincos_v2f32:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -48
+; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill
+; LA32-NEXT: fst.d $fs0, $sp, 32 # 8-byte Folded Spill
+; LA32-NEXT: fst.d $fs1, $sp, 24 # 8-byte Folded Spill
+; LA32-NEXT: fst.d $fs2, $sp, 16 # 8-byte Folded Spill
+; LA32-NEXT: fst.d $fs3, $sp, 8 # 8-byte Folded Spill
+; LA32-NEXT: fmov.s $fs0, $fa1
+; LA32-NEXT: fmov.s $fs1, $fa0
+; LA32-NEXT: move $fp, $a0
+; LA32-NEXT: bl sinf
+; LA32-NEXT: fmov.s $fs2, $fa0
+; LA32-NEXT: fmov.s $fa0, $fs0
+; LA32-NEXT: bl sinf
+; LA32-NEXT: fmov.s $fs3, $fa0
+; LA32-NEXT: fmov.s $fa0, $fs1
+; LA32-NEXT: bl cosf
+; LA32-NEXT: fmov.s $fs1, $fa0
+; LA32-NEXT: fmov.s $fa0, $fs0
+; LA32-NEXT: bl cosf
+; LA32-NEXT: fst.s $fa0, $fp, 12
+; LA32-NEXT: fst.s $fs1, $fp, 8
+; LA32-NEXT: fst.s $fs3, $fp, 4
+; LA32-NEXT: fst.s $fs2, $fp, 0
+; LA32-NEXT: fld.d $fs3, $sp, 8 # 8-byte Folded Reload
+; LA32-NEXT: fld.d $fs2, $sp, 16 # 8-byte Folded Reload
+; LA32-NEXT: fld.d $fs1, $sp, 24 # 8-byte Folded Reload
+; LA32-NEXT: fld.d $fs0, $sp, 32 # 8-byte Folded Reload
+; LA32-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 48
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_sincos_v2f32:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -64
+; LA64-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
+; LA64-NEXT: vreplvei.w $vr0, $vr0, 0
+; LA64-NEXT: vst $vr0, $sp, 32 # 16-byte Folded Spill
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0
+; LA64-NEXT: pcaddu18i $ra, %call36(sinf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
+; LA64-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
+; LA64-NEXT: vreplvei.w $vr0, $vr0, 1
+; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0
+; LA64-NEXT: pcaddu18i $ra, %call36(sinf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
+; LA64-NEXT: vpackev.w $vr0, $vr0, $vr1
+; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
+; LA64-NEXT: vld $vr0, $sp, 32 # 16-byte Folded Reload
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0
+; LA64-NEXT: pcaddu18i $ra, %call36(cosf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA64-NEXT: vst $vr0, $sp, 32 # 16-byte Folded Spill
+; LA64-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0
+; LA64-NEXT: pcaddu18i $ra, %call36(cosf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA64-NEXT: vld $vr1, $sp, 32 # 16-byte Folded Reload
+; LA64-NEXT: vpackev.w $vr1, $vr0, $vr1
+; LA64-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 64
+; LA64-NEXT: ret
+ %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a)
+ ret { <2 x float>, <2 x float> } %result
+}
+
+define { <3 x float>, <3 x float> } @test_sincos_v3f32(<3 x float> %a) #0 {
+; LA32-LABEL: test_sincos_v3f32:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -64
+; LA32-NEXT: st.w $ra, $sp, 60 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 56 # 4-byte Folded Spill
+; LA32-NEXT: fst.d $fs0, $sp, 48 # 8-byte Folded Spill
+; LA32-NEXT: fst.d $fs1, $sp, 40 # 8-byte Folded Spill
+; LA32-NEXT: fst.d $fs2, $sp, 32 # 8-byte Folded Spill
+; LA32-NEXT: fst.d $fs3, $sp, 24 # 8-byte Folded Spill
+; LA32-NEXT: fst.d $fs4, $sp, 16 # 8-byte Folded Spill
+; LA32-NEXT: fst.d $fs5, $sp, 8 # 8-byte Folded Spill
+; LA32-NEXT: fmov.s $fs0, $fa2
+; LA32-NEXT: fmov.s $fs1, $fa1
+; LA32-NEXT: fmov.s $fs2, $fa0
+; LA32-NEXT: move $fp, $a0
+; LA32-NEXT: bl sinf
+; LA32-NEXT: fmov.s $fs3, $fa0
+; LA32-NEXT: fmov.s $fa0, $fs1
+; LA32-NEXT: bl sinf
+; LA32-NEXT: fmov.s $fs4, $fa0
+; LA32-NEXT: fmov.s $fa0, $fs0
+; LA32-NEXT: bl sinf
+; LA32-NEXT: fmov.s $fs5, $fa0
+; LA32-NEXT: fmov.s $fa0, $fs2
+; LA32-NEXT: bl cosf
+; LA32-NEXT: fmov.s $fs2, $fa0
+; LA32-NEXT: fmov.s $fa0, $fs1
+; LA32-NEXT: bl cosf
+; LA32-NEXT: fmov.s $fs1, $fa0
+; LA32-NEXT: fmov.s $fa0, $fs0
+; LA32-NEXT: bl cosf
+; LA32-NEXT: fst.s $fa0, $fp, 24
+; LA32-NEXT: fst.s $fs1, $fp, 20
+; LA32-NEXT: fst.s $fs2, $fp, 16
+; LA32-NEXT: fst.s $fs5, $fp, 8
+; LA32-NEXT: fst.s $fs4, $fp, 4
+; LA32-NEXT: fst.s $fs3, $fp, 0
+; LA32-NEXT: fld.d $fs5, $sp, 8 # 8-byte Folded Reload
+; LA32-NEXT: fld.d $fs4, $sp, 16 # 8-byte Folded Reload
+; LA32-NEXT: fld.d $fs3, $sp, 24 # 8-byte Folded Reload
+; LA32-NEXT: fld.d $fs2, $sp, 32 # 8-byte Folded Reload
+; LA32-NEXT: fld.d $fs1, $sp, 40 # 8-byte Folded Reload
+; LA32-NEXT: fld.d $fs0, $sp, 48 # 8-byte Folded Reload
+; LA32-NEXT: ld.w $fp, $sp, 56 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 60 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 64
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_sincos_v3f32:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -96
+; LA64-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
+; LA64-NEXT: vst $vr0, $sp, 32 # 16-byte Folded Spill
+; LA64-NEXT: vreplvei.w $vr0, $vr0, 2
+; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0
+; LA64-NEXT: pcaddu18i $ra, %call36(sinf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: fst.s $fa0, $sp, 72
+; LA64-NEXT: vld $vr0, $sp, 32 # 16-byte Folded Reload
+; LA64-NEXT: vreplvei.w $vr0, $vr0, 1
+; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0
+; LA64-NEXT: pcaddu18i $ra, %call36(sinf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: fst.s $fa0, $sp, 68
+; LA64-NEXT: vld $vr0, $sp, 32 # 16-byte Folded Reload
+; LA64-NEXT: vreplvei.w $vr0, $vr0, 0
+; LA64-NEXT: vst $vr0, $sp, 32 # 16-byte Folded Spill
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0
+; LA64-NEXT: pcaddu18i $ra, %call36(sinf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: fst.s $fa0, $sp, 64
+; LA64-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0
+; LA64-NEXT: pcaddu18i $ra, %call36(cosf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: fst.s $fa0, $sp, 56
+; LA64-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0
+; LA64-NEXT: pcaddu18i $ra, %call36(cosf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: fst.s $fa0, $sp, 52
+; LA64-NEXT: vld $vr0, $sp, 32 # 16-byte Folded Reload
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0
+; LA64-NEXT: pcaddu18i $ra, %call36(cosf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: fst.s $fa0, $sp, 48
+; LA64-NEXT: vld $vr0, $sp, 64
+; LA64-NEXT: vld $vr1, $sp, 48
+; LA64-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 96
+; LA64-NEXT: ret
+ %result = call { <3 x float>, <3 x float> } @llvm.sincos.v3f32(<3 x float> %a)
+ ret { <...
[truncated]
|
heiher
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
5f21740 to
8ee5b85
Compare
65011f9 to
248eb92
Compare
Merge activity
|
248eb92 to
aceabe9
Compare
3baa828 to
5e1d92b
Compare
5e1d92b to
281f2ca
Compare

No description provided.