Skip to content

Commit 2afc956

Browse files
committed
Release 1.0.7
* Implemented axis_apply_log1 and axis_apply_log2 optimized for AArch64 ASIMD. * Implemented fill_rgba and fill_hsla for AArch64 ASIMD. * Implemented rgba_to_hsla, hsla_to_rgba, rgba_to_bgra32, rgba32_to_bgra32 for AArch64 ASIMD. * Implemented eff_hsla_hue, eff_hsla_sat, eff_hsla_light, eff_hsla_alpha for AArch64 ASIMD.
2 parents a364278 + 9b29544 commit 2afc956

File tree

31 files changed

+6359
-3652
lines changed

31 files changed

+6359
-3652
lines changed

CHANGELOG

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,12 @@
22
* RECENT CHANGES
33
*******************************************************************************
44

5+
=== 1.0.7 ===
6+
* Implemented axis_apply_log1 and axis_apply_log2 optimized for AArch64 ASIMD.
7+
* Implemented fill_rgba and fill_hsla for AArch64 ASIMD.
8+
* Implemented rgba_to_hsla, hsla_to_rgba, rgba_to_bgra32, rgba32_to_bgra32 for AArch64 ASIMD.
9+
* Implemented eff_hsla_hue, eff_hsla_sat, eff_hsla_light, eff_hsla_alpha for AArch64 ASIMD.
10+
511
=== 1.0.6 ===
612
* Updated build scripts.
713

include/lsp-plug.in/dsp/version.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
// Define version of headers
2626
#define LSP_DSP_LIB_MAJOR 1
2727
#define LSP_DSP_LIB_MINOR 0
28-
#define LSP_DSP_LIB_MICRO 6
28+
#define LSP_DSP_LIB_MICRO 7
2929

3030
#if defined(__WINDOWS__) || defined(__WIN32__) || defined(__WIN64__) || defined(_WIN64) || defined(_WIN32) || defined(__WINNT) || defined(__WINNT__)
3131
#define LSP_DSP_LIB_EXPORT_MODIFIER __declspec(dllexport)

include/private/dsp/arch/aarch64/asimd/filters/dynamic.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
#error "This header should not be included directly"
2727
#endif /* PRIVATE_DSP_ARCH_AARCH64_ASIMD_IMPL */
2828

29+
#include <private/dsp/arch/aarch64/asimd/filters/static.h>
30+
2931
namespace lsp
3032
{
3133
namespace asimd

include/private/dsp/arch/aarch64/asimd/graphics.h

Lines changed: 0 additions & 31 deletions
This file was deleted.

include/private/dsp/arch/aarch64/asimd/graphics/axis.h

Lines changed: 963 additions & 0 deletions
Large diffs are not rendered by default.

include/private/dsp/arch/aarch64/asimd/graphics/colors.h

Lines changed: 724 additions & 0 deletions
Large diffs are not rendered by default.

include/private/dsp/arch/aarch64/asimd/graphics/effects.h

Lines changed: 545 additions & 0 deletions
Large diffs are not rendered by default.

include/private/dsp/arch/aarch64/asimd/graphics/pixelfmt.h

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,126 @@ namespace lsp
3030
{
3131
namespace asimd
3232
{
33+
IF_ARCH_AARCH64(
34+
static const uint32_t rgba32_to_bgra32_const[] __lsp_aligned16 =
35+
{
36+
LSP_DSP_VEC4(0x00ff00ff),
37+
LSP_DSP_VEC4(0x00ff00ff),
38+
};
39+
);
40+
41+
void rgba32_to_bgra32(void *dst, const void *src, size_t count)
42+
{
43+
ARCH_AARCH64_ASM(
44+
__ASM_EMIT("ldp q16, q17, [%[XC]]")
45+
46+
// 32x blocks
47+
__ASM_EMIT("subs %[count], %[count], #32")
48+
__ASM_EMIT("b.lo 2f")
49+
__ASM_EMIT("1:")
50+
__ASM_EMIT("ldp q0, q1, [%[src], 0x00]") // v0 = R G B A
51+
__ASM_EMIT("ldp q2, q3, [%[src], 0x20]")
52+
__ASM_EMIT("ldp q4, q5, [%[src], 0x40]")
53+
__ASM_EMIT("ldp q6, q7, [%[src], 0x60]")
54+
__ASM_EMIT("rev32 v8.8h, v0.8h") // v8 = B A R G
55+
__ASM_EMIT("rev32 v9.8h, v1.8h")
56+
__ASM_EMIT("rev32 v10.8h, v2.8h")
57+
__ASM_EMIT("rev32 v11.8h, v3.8h")
58+
__ASM_EMIT("rev32 v12.8h, v4.8h")
59+
__ASM_EMIT("rev32 v13.8h, v5.8h")
60+
__ASM_EMIT("rev32 v14.8h, v6.8h")
61+
__ASM_EMIT("rev32 v15.8h, v7.8h")
62+
__ASM_EMIT("bit v0.16b, v8.16b, v16.16b") // v0 = B G R A
63+
__ASM_EMIT("bit v1.16b, v9.16b, v17.16b")
64+
__ASM_EMIT("bit v2.16b, v10.16b, v16.16b")
65+
__ASM_EMIT("bit v3.16b, v11.16b, v17.16b")
66+
__ASM_EMIT("bit v4.16b, v12.16b, v16.16b")
67+
__ASM_EMIT("bit v5.16b, v13.16b, v17.16b")
68+
__ASM_EMIT("bit v6.16b, v14.16b, v16.16b")
69+
__ASM_EMIT("bit v7.16b, v15.16b, v17.16b")
70+
__ASM_EMIT("stp q0, q1, [%[dst], 0x00]")
71+
__ASM_EMIT("stp q2, q3, [%[dst], 0x20]")
72+
__ASM_EMIT("stp q4, q5, [%[dst], 0x40]")
73+
__ASM_EMIT("stp q6, q7, [%[dst], 0x60]")
74+
__ASM_EMIT("subs %[count], %[count], #32")
75+
__ASM_EMIT("add %[src], %[src], 0x80")
76+
__ASM_EMIT("add %[dst], %[dst], 0x80")
77+
__ASM_EMIT("b.hs 1b")
78+
79+
// 16x blocks
80+
__ASM_EMIT("2:")
81+
__ASM_EMIT("adds %[count], %[count], #16")
82+
__ASM_EMIT("b.lt 4f")
83+
__ASM_EMIT("ldp q0, q1, [%[src], 0x00]") // v0 = R G B A
84+
__ASM_EMIT("ldp q2, q3, [%[src], 0x20]")
85+
__ASM_EMIT("rev32 v8.8h, v0.8h") // v8 = B A R G
86+
__ASM_EMIT("rev32 v9.8h, v1.8h")
87+
__ASM_EMIT("rev32 v10.8h, v2.8h")
88+
__ASM_EMIT("rev32 v11.8h, v3.8h")
89+
__ASM_EMIT("bit v0.16b, v8.16b, v16.16b") // v0 = B G R A
90+
__ASM_EMIT("bit v1.16b, v9.16b, v17.16b")
91+
__ASM_EMIT("bit v2.16b, v10.16b, v16.16b")
92+
__ASM_EMIT("bit v3.16b, v11.16b, v17.16b")
93+
__ASM_EMIT("stp q0, q1, [%[dst], 0x00]")
94+
__ASM_EMIT("stp q2, q3, [%[dst], 0x20]")
95+
__ASM_EMIT("sub %[count], %[count], #16")
96+
__ASM_EMIT("add %[src], %[src], 0x40")
97+
__ASM_EMIT("add %[dst], %[dst], 0x40")
98+
99+
// 8x blocks
100+
__ASM_EMIT("4:")
101+
__ASM_EMIT("adds %[count], %[count], #8")
102+
__ASM_EMIT("b.lt 6f")
103+
__ASM_EMIT("ldp q0, q1, [%[src], 0x00]") // v0 = R G B A
104+
__ASM_EMIT("rev32 v8.8h, v0.8h") // v8 = B A R G
105+
__ASM_EMIT("rev32 v9.8h, v1.8h")
106+
__ASM_EMIT("bit v0.16b, v8.16b, v16.16b") // v0 = B G R A
107+
__ASM_EMIT("bit v1.16b, v9.16b, v17.16b")
108+
__ASM_EMIT("stp q0, q1, [%[dst], 0x00]")
109+
__ASM_EMIT("sub %[count], %[count], #8")
110+
__ASM_EMIT("add %[src], %[src], 0x20")
111+
__ASM_EMIT("add %[dst], %[dst], 0x20")
112+
113+
// 4x blocks
114+
__ASM_EMIT("6:")
115+
__ASM_EMIT("adds %[count], %[count], #4")
116+
__ASM_EMIT("b.lt 8f")
117+
__ASM_EMIT("ldr q0, [%[src], 0x00]") // v0 = R G B A
118+
__ASM_EMIT("rev32 v8.8h, v0.8h") // v8 = B A R G
119+
__ASM_EMIT("bit v0.16b, v8.16b, v16.16b") // v0 = B G R A
120+
__ASM_EMIT("str q0, [%[dst], 0x00]")
121+
__ASM_EMIT("sub %[count], %[count], #4")
122+
__ASM_EMIT("add %[src], %[src], 0x10")
123+
__ASM_EMIT("add %[dst], %[dst], 0x10")
124+
125+
// 1x blocks
126+
__ASM_EMIT("8:")
127+
__ASM_EMIT("adds %[count], %[count], #3")
128+
__ASM_EMIT("b.lt 10f")
129+
__ASM_EMIT("9:")
130+
__ASM_EMIT("ld1r {v0.4s}, [%[src]]") // v0 = R G B A
131+
__ASM_EMIT("rev32 v8.8h, v0.8h") // v8 = B A R G
132+
__ASM_EMIT("bit v0.16b, v8.16b, v16.16b") // v0 = B G R A
133+
__ASM_EMIT("st1 {v0.s}[0], [%[dst]]")
134+
__ASM_EMIT("add %[src], %[src], 0x04")
135+
__ASM_EMIT("add %[dst], %[dst], 0x04")
136+
__ASM_EMIT("subs %[count], %[count], #1")
137+
__ASM_EMIT("b.ge 9b")
138+
139+
// End
140+
__ASM_EMIT("10:")
141+
: [src] "+r" (src), [dst] "+r" (dst),
142+
[count] "+r" (count)
143+
: [XC] "r" (&rgba32_to_bgra32_const[0])
144+
: "cc", "memory",
145+
"v0", "v1", "v2", "v3",
146+
"v4", "v5", "v6", "v7",
147+
"v8", "v9", "v10", "v11",
148+
"v12", "v13", "v14", "v15",
149+
"v16", "v17"
150+
);
151+
}
152+
33153
IF_ARCH_AARCH64(
34154
static const uint32_t abgr32_to_bgrff32_const[] __lsp_aligned32 =
35155
{

include/private/dsp/arch/arm/neon-d32/fastconv.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#error "This header should not be included directly"
2727
#endif /* PRIVATE_DSP_ARCH_ARM_NEON_D32_IMPL */
2828

29+
#include <private/dsp/arch/arm/neon-d32/fft/const.h>
2930
#include <private/dsp/arch/arm/neon-d32/fastconv/parse.h>
3031
#include <private/dsp/arch/arm/neon-d32/fastconv/restore.h>
3132
#include <private/dsp/arch/arm/neon-d32/fastconv/apply.h>

0 commit comments

Comments
 (0)