Skip to content

Commit d0df257

Browse files
committed
Release 0.5.10
* Updated build scripts. * Reviewed generic abgr32_to_bgrff function implementation for Big endian platforms. * Submitted x86/x86_64 SSE2-optimized function abgr32_to_bgrff. * Submitted x86/x86_64 AVX2-optimized function abgr32_to_bgrff. * Submitted ARMv7 NEON-optimized function abgr32_to_bgrff. * Submitted AArch64 ASIMD-optimized function abgr32_to_bgrff. * Fixed lacking export for SSE2-optimized functions support: fmmod3, fmrmod3, fmmod4, fmrmod4. * Added support of AMD Zen 2 architecture for CPU tuning. * More careful AVX2 support detection for AMD processors. * Fixed improper RGBA to HSLA conversion in rgba_to_hsla routine.
2 parents 592c74b + 1d71eb9 commit d0df257

File tree

31 files changed

+8228
-1091
lines changed

31 files changed

+8228
-1091
lines changed

CHANGELOG

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,18 @@
22
* RECENT CHANGES
33
*******************************************************************************
44

5+
=== 0.5.10 ===
6+
* Updated build scripts.
7+
* Reviewed generic abgr32_to_bgrff function implementation for Big endian platforms.
8+
* Submitted x86/x86_64 SSE2-optimized function abgr32_to_bgrff.
9+
* Submitted x86/x86_64 AVX2-optimized function abgr32_to_bgrff.
10+
* Submitted ARMv7 NEON-optimized function abgr32_to_bgrff.
11+
* Submitted AArch64 ASIMD-optimized function abgr32_to_bgrff.
12+
* Fixed lacking export for SSE2-optimized functions support: fmmod3, fmrmod3, fmmod4, fmrmod4.
13+
* Added support of AMD Zen 2 architecture for CPU tuning.
14+
* More careful AVX2 support detection for AMD processors.
15+
* Fixed improper RGBA to HSLA conversion in rgba_to_hsla routine.
16+
517
=== 0.5.9 ===
618
* Updated build scripts that now use tags without prefixes first.
719

dependencies.mk

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
#
2020

2121
# Variables that describe dependencies
22-
LSP_COMMON_LIB_VERSION := 1.0.8
22+
LSP_COMMON_LIB_VERSION := 1.0.10
2323
LSP_COMMON_LIB_NAME := lsp-common-lib
2424
LSP_COMMON_LIB_TYPE := src
2525
LSP_COMMON_LIB_URL := https://github.com/sadko4u/$(LSP_COMMON_LIB_NAME).git

include/lsp-plug.in/dsp/version.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
// Define version of headers
2626
#define LSP_DSP_LIB_MAJOR 0
2727
#define LSP_DSP_LIB_MINOR 5
28-
#define LSP_DSP_LIB_MICRO 9
28+
#define LSP_DSP_LIB_MICRO 10
2929

3030
#ifdef LSP_DSP_LIB_BUILTIN
3131
#define LSP_DSP_LIB_CPPEXPORT
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/*
2+
* Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
3+
* (C) 2020 Vladimir Sadovnikov <[email protected]>
4+
*
5+
* This file is part of lsp-dsp-lib
6+
* Created on: 11 окт. 2020 г.
7+
*
8+
* lsp-dsp-lib is free software: you can redistribute it and/or modify
9+
* it under the terms of the GNU Lesser General Public License as published by
10+
* the Free Software Foundation, either version 3 of the License, or
11+
* any later version.
12+
*
13+
* lsp-dsp-lib is distributed in the hope that it will be useful,
14+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
15+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16+
* GNU Lesser General Public License for more details.
17+
*
18+
* You should have received a copy of the GNU Lesser General Public License
19+
* along with lsp-dsp-lib. If not, see <https://www.gnu.org/licenses/>.
20+
*/
21+
22+
#ifndef PRIVATE_DSP_ARCH_AARCH64_ASIMD_GRAPHICS_H_
23+
#define PRIVATE_DSP_ARCH_AARCH64_ASIMD_GRAPHICS_H_
24+
25+
#ifndef PRIVATE_DSP_ARCH_AARCH64_ASIMD_IMPL
26+
#error "This header should not be included directly"
27+
#endif /* PRIVATE_DSP_ARCH_AARCH64_ASIMD_IMPL */
28+
29+
#include <private/dsp/arch/aarch64/asimd/graphics/pixelfmt.h>
30+
31+
#endif /* PRIVATE_DSP_ARCH_AARCH64_ASIMD_GRAPHICS_H_ */
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
/*
2+
* Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
3+
* (C) 2020 Vladimir Sadovnikov <[email protected]>
4+
*
5+
* This file is part of lsp-dsp-lib
6+
* Created on: 11 окт. 2020 г.
7+
*
8+
* lsp-dsp-lib is free software: you can redistribute it and/or modify
9+
* it under the terms of the GNU Lesser General Public License as published by
10+
* the Free Software Foundation, either version 3 of the License, or
11+
* any later version.
12+
*
13+
* lsp-dsp-lib is distributed in the hope that it will be useful,
14+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
15+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16+
* GNU Lesser General Public License for more details.
17+
*
18+
* You should have received a copy of the GNU Lesser General Public License
19+
* along with lsp-dsp-lib. If not, see <https://www.gnu.org/licenses/>.
20+
*/
21+
22+
#ifndef PRIVATE_DSP_ARCH_AARCH64_ASIMD_GRAPHICS_PIXELFMT_H_
23+
#define PRIVATE_DSP_ARCH_AARCH64_ASIMD_GRAPHICS_PIXELFMT_H_
24+
25+
#ifndef PRIVATE_DSP_ARCH_AARCH64_ASIMD_IMPL
26+
#error "This header should not be included directly"
27+
#endif /* PRIVATE_DSP_ARCH_AARCH64_ASIMD_IMPL */
28+
29+
namespace lsp
30+
{
31+
namespace asimd
32+
{
33+
IF_ARCH_AARCH64(
34+
static const uint32_t abgr32_to_bgrff32_const[] __lsp_aligned32 =
35+
{
36+
LSP_DSP_VEC8(0xff000000)
37+
};
38+
);
39+
40+
void abgr32_to_bgrff32(void *dst, const void *src, size_t count)
41+
{
42+
ARCH_AARCH64_ASM
43+
(
44+
__ASM_EMIT("ldp q16, q17, [%[MASK]]")
45+
__ASM_EMIT("subs %[count], %[count], #32")
46+
__ASM_EMIT("ldp q18, q19, [%[MASK]]")
47+
__ASM_EMIT("b.lo 2f")
48+
// 32x blocks
49+
__ASM_EMIT("ldp q20, q21, [%[MASK]]")
50+
__ASM_EMIT("ldp q22, q23, [%[MASK]]")
51+
__ASM_EMIT("1:")
52+
__ASM_EMIT("ldp q0, q1, [%[src], #0x00]")
53+
__ASM_EMIT("ldp q2, q3, [%[src], #0x20]")
54+
__ASM_EMIT("ldp q4, q5, [%[src], #0x40]")
55+
__ASM_EMIT("ldp q6, q7, [%[src], #0x60]")
56+
__ASM_EMIT("sri v16.4s, v0.4s, #8")
57+
__ASM_EMIT("sri v17.4s, v1.4s, #8")
58+
__ASM_EMIT("sri v18.4s, v2.4s, #8")
59+
__ASM_EMIT("sri v19.4s, v3.4s, #8")
60+
__ASM_EMIT("sri v20.4s, v4.4s, #8")
61+
__ASM_EMIT("sri v21.4s, v5.4s, #8")
62+
__ASM_EMIT("sri v22.4s, v6.4s, #8")
63+
__ASM_EMIT("sri v23.4s, v7.4s, #8")
64+
__ASM_EMIT("stp q16, q17, [%[dst], #0x00]")
65+
__ASM_EMIT("stp q18, q19, [%[dst], #0x20]")
66+
__ASM_EMIT("stp q20, q21, [%[dst], #0x40]")
67+
__ASM_EMIT("stp q22, q23, [%[dst], #0x60]")
68+
__ASM_EMIT("add %[src], %[src], #0x80")
69+
__ASM_EMIT("subs %[count], %[count], #32")
70+
__ASM_EMIT("add %[dst], %[dst], #0x80")
71+
__ASM_EMIT("b.hs 1b")
72+
// 16x block
73+
__ASM_EMIT("2:")
74+
__ASM_EMIT("adds %[count], %[count], #16")
75+
__ASM_EMIT("b.lt 4f")
76+
__ASM_EMIT("ldp q0, q1, [%[src], #0x00]")
77+
__ASM_EMIT("ldp q2, q3, [%[src], #0x20]")
78+
__ASM_EMIT("sri v16.4s, v0.4s, #8")
79+
__ASM_EMIT("sri v17.4s, v1.4s, #8")
80+
__ASM_EMIT("sri v18.4s, v2.4s, #8")
81+
__ASM_EMIT("sri v19.4s, v3.4s, #8")
82+
__ASM_EMIT("stp q16, q17, [%[dst], #0x00]")
83+
__ASM_EMIT("stp q18, q19, [%[dst], #0x20]")
84+
__ASM_EMIT("add %[src], %[src], #0x40")
85+
__ASM_EMIT("sub %[count], %[count], #16")
86+
__ASM_EMIT("add %[dst], %[dst], #0x40")
87+
// 8x block
88+
__ASM_EMIT("4:")
89+
__ASM_EMIT("adds %[count], %[count], #8")
90+
__ASM_EMIT("b.lt 6f")
91+
__ASM_EMIT("ldp q0, q1, [%[src], #0x00]")
92+
__ASM_EMIT("sri v16.4s, v0.4s, #8")
93+
__ASM_EMIT("sri v17.4s, v1.4s, #8")
94+
__ASM_EMIT("stp q16, q17, [%[dst], #0x00]")
95+
__ASM_EMIT("add %[src], %[src], #0x20")
96+
__ASM_EMIT("add %[dst], %[dst], #0x20")
97+
__ASM_EMIT("sub %[count], %[count], #8")
98+
// 4x block
99+
__ASM_EMIT("6:")
100+
__ASM_EMIT("adds %[count], %[count], #4")
101+
__ASM_EMIT("b.lt 8f")
102+
__ASM_EMIT("ldr q0, [%[src], #0x00]")
103+
__ASM_EMIT("sri v16.4s, v0.4s, #8")
104+
__ASM_EMIT("str q16, [%[dst], #0x00]")
105+
__ASM_EMIT("add %[src], %[src], #0x10")
106+
__ASM_EMIT("add %[dst], %[dst], #0x10")
107+
__ASM_EMIT("sub %[count], %[count], #4")
108+
// 1x blocks
109+
__ASM_EMIT("8:")
110+
__ASM_EMIT("adds %[count], %[count], #3")
111+
__ASM_EMIT("b.lt 10f")
112+
__ASM_EMIT("9:")
113+
__ASM_EMIT("ld1r {v0.4s}, [%[src]]")
114+
__ASM_EMIT("sri v16.4s, v0.4s, #8")
115+
__ASM_EMIT("subs %[count], %[count], #1")
116+
__ASM_EMIT("st1 {v16.s}[0], [%[dst]]")
117+
__ASM_EMIT("add %[src], %[src], #0x04")
118+
__ASM_EMIT("add %[dst], %[dst], #0x04")
119+
__ASM_EMIT("b.ge 9b")
120+
__ASM_EMIT("10:")
121+
122+
: [dst] "+r" (dst), [src] "+r" (src),
123+
[count] "+r" (count)
124+
: [MASK] "r" (&abgr32_to_bgrff32_const[0])
125+
: "cc", "memory",
126+
"q0", "q1", "q2", "q3",
127+
"q4", "q5", "q6", "q7",
128+
"q16", "q17", "q18", "q19",
129+
"q20", "q21", "q22", "q23"
130+
);
131+
}
132+
}
133+
}
134+
135+
#endif /* PRIVATE_DSP_ARCH_AARCH64_ASIMD_GRAPHICS_PIXELFMT_H_ */

include/private/dsp/arch/arm/neon-d32/graphics.h

Lines changed: 9 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
#error "This header should not be included directly"
2727
#endif /* PRIVATE_DSP_ARCH_ARM_NEON_D32_IMPL */
2828

29+
#include <private/dsp/arch/arm/neon-d32/graphics/pixelfmt.h>
30+
2931
namespace lsp
3032
{
3133
namespace neon_d32
@@ -966,120 +968,6 @@ namespace lsp
966968
);
967969
}
968970

969-
void rgba32_to_bgra32(void *dst, const void *src, size_t count)
970-
{
971-
IF_ARCH_ARM(
972-
uint32_t mask;
973-
uint32_t t1, t2;
974-
);
975-
976-
ARCH_ARM_ASM(
977-
// 64x blocks
978-
__ASM_EMIT("subs %[count], $64")
979-
__ASM_EMIT("blo 2f")
980-
__ASM_EMIT("1:")
981-
__ASM_EMIT("vld4.8 {q0-q1}, [%[src]]!") // d0 = R, d1 = G, d2 = B, d3 = A
982-
__ASM_EMIT("vld4.8 {q2-q3}, [%[src]]!")
983-
__ASM_EMIT("vswp d0, d2")
984-
__ASM_EMIT("vld4.8 {q4-q5}, [%[src]]!")
985-
__ASM_EMIT("vst4.8 {q0-q1}, [%[dst]]!")
986-
__ASM_EMIT("vswp d4, d6")
987-
__ASM_EMIT("vld4.8 {q6-q7}, [%[src]]!")
988-
__ASM_EMIT("vst4.8 {q2-q3}, [%[dst]]!")
989-
__ASM_EMIT("vswp d8, d10")
990-
__ASM_EMIT("vld4.8 {q8-q9}, [%[src]]!")
991-
__ASM_EMIT("vst4.8 {q4-q5}, [%[dst]]!")
992-
__ASM_EMIT("vswp d12, d14")
993-
__ASM_EMIT("vld4.8 {q10-q11}, [%[src]]!")
994-
__ASM_EMIT("vst4.8 {q6-q7}, [%[dst]]!")
995-
__ASM_EMIT("vswp d16, d18")
996-
__ASM_EMIT("vld4.8 {q12-q13}, [%[src]]!")
997-
__ASM_EMIT("vst4.8 {q8-q9}, [%[dst]]!")
998-
__ASM_EMIT("vswp d20, d22")
999-
__ASM_EMIT("vld4.8 {q14-q15}, [%[src]]!")
1000-
__ASM_EMIT("vst4.8 {q10-q11}, [%[dst]]!")
1001-
__ASM_EMIT("vswp d24, d26")
1002-
__ASM_EMIT("vswp d28, d30")
1003-
__ASM_EMIT("vst4.8 {q12-q13}, [%[dst]]!")
1004-
__ASM_EMIT("vst4.8 {q14-q15}, [%[dst]]!")
1005-
__ASM_EMIT("subs %[count], $64")
1006-
__ASM_EMIT("bhs 1b")
1007-
1008-
// 32x blocks
1009-
__ASM_EMIT("2:")
1010-
__ASM_EMIT("adds %[count], $32")
1011-
__ASM_EMIT("blt 4f")
1012-
__ASM_EMIT("vld4.8 {q0-q1}, [%[src]]!") // d0 = R, d1 = G, d2 = B, d3 = A
1013-
__ASM_EMIT("vld4.8 {q2-q3}, [%[src]]!")
1014-
__ASM_EMIT("vswp d0, d2")
1015-
__ASM_EMIT("vld4.8 {q4-q5}, [%[src]]!")
1016-
__ASM_EMIT("vst4.8 {q0-q1}, [%[dst]]!")
1017-
__ASM_EMIT("vswp d4, d6")
1018-
__ASM_EMIT("vld4.8 {q6-q7}, [%[src]]!")
1019-
__ASM_EMIT("vst4.8 {q2-q3}, [%[dst]]!")
1020-
__ASM_EMIT("vswp d8, d10")
1021-
__ASM_EMIT("vswp d12, d14")
1022-
__ASM_EMIT("vst4.8 {q4-q5}, [%[dst]]!")
1023-
__ASM_EMIT("vst4.8 {q6-q7}, [%[dst]]!")
1024-
__ASM_EMIT("sub %[count], $32")
1025-
1026-
// 16x blocks
1027-
__ASM_EMIT("4:")
1028-
__ASM_EMIT("adds %[count], $16")
1029-
__ASM_EMIT("blt 6f")
1030-
__ASM_EMIT("vld4.8 {q0-q1}, [%[src]]!") // d0 = R, d1 = G, d2 = B, d3 = A
1031-
__ASM_EMIT("vld4.8 {q2-q3}, [%[src]]!")
1032-
__ASM_EMIT("vswp d0, d2")
1033-
__ASM_EMIT("vswp d4, d6")
1034-
__ASM_EMIT("vst4.8 {q0-q1}, [%[dst]]!")
1035-
__ASM_EMIT("vst4.8 {q2-q3}, [%[dst]]!")
1036-
__ASM_EMIT("sub %[count], $16")
1037-
1038-
// 8x blocks
1039-
__ASM_EMIT("6:")
1040-
__ASM_EMIT("adds %[count], $8")
1041-
__ASM_EMIT("blt 8f")
1042-
__ASM_EMIT("vld4.8 {q0-q1}, [%[src]]!") // d0 = R, d1 = G, d2 = B, d3 = A
1043-
__ASM_EMIT("vswp d0, d2")
1044-
__ASM_EMIT("vst4.8 {q0-q1}, [%[dst]]!")
1045-
__ASM_EMIT("sub %[count], $8")
1046-
1047-
// 4x blocks
1048-
__ASM_EMIT("8:")
1049-
__ASM_EMIT("adds %[count], $4")
1050-
__ASM_EMIT("blt 10f")
1051-
__ASM_EMIT("vld2.8 {q0}, [%[src]]!") // d0 = RB, d1 = GA
1052-
__ASM_EMIT("vshl.i16 d2, d0, $8")
1053-
__ASM_EMIT("vshr.u16 d0, d0, $8")
1054-
__ASM_EMIT("vorr d0, d2")
1055-
__ASM_EMIT("vst2.8 {q0}, [%[dst]]!")
1056-
__ASM_EMIT("sub %[count], $4")
1057-
1058-
// 1x blocks
1059-
__ASM_EMIT("10:")
1060-
__ASM_EMIT("adds %[count], $3")
1061-
__ASM_EMIT("blt 12f")
1062-
__ASM_EMIT("mov %[mask], $0xff")
1063-
__ASM_EMIT("orr %[mask], $0xff0000") // mask = ff 00 ff 00
1064-
__ASM_EMIT("11:")
1065-
__ASM_EMIT("ldr %[t1], [%[src]], $4") // t1 = R G B A
1066-
__ASM_EMIT("and %[t2], %[t1], %[mask]") // t2 = R 0 B 0
1067-
__ASM_EMIT("and %[t1], %[t1], %[mask], lsl $8") // t1 = 0 G 0 A
1068-
__ASM_EMIT("orr %[t1], %[t1], %[t2], ror $16") // t1 = B G R A
1069-
__ASM_EMIT("str %[t1], [%[dst]], $4")
1070-
__ASM_EMIT("subs %[count], $1")
1071-
__ASM_EMIT("bge 11b")
1072-
1073-
__ASM_EMIT("12:")
1074-
: [src] "+r" (src), [dst] "+r" (dst), [count] "+r" (count),
1075-
[t1] "=&r" (t1), [t2] "=&r" (t2), [mask] "=&r" (mask)
1076-
:
1077-
: "cc", "memory",
1078-
"q0", "q1", "q2", "q3" , "q4", "q5", "q6", "q7",
1079-
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
1080-
);
1081-
}
1082-
1083971
#define FILL4_CORE \
1084972
__ASM_EMIT("vld1.32 {q0}, [%[c4]]") /* q0 = c0 c1 c2 c3 */ \
1085973
__ASM_EMIT("vmov q1, q0") /* q1 = c0 c1 c2 c3 */ \
@@ -1465,13 +1353,13 @@ namespace lsp
14651353
__ASM_EMIT("vmul.f32 q4, q6, q9") /* q4 = D/X */ \
14661354
__ASM_EMIT("veor q5, q5") /* q5 = 0 */ \
14671355
\
1468-
__ASM_EMIT("vclt.f32 q6, q1, q13") /* q6 = [L < 1] */ \
1356+
__ASM_EMIT("vcle.f32 q6, q1, q14") /* q6 = [L <= 0.5] */ \
14691357
__ASM_EMIT("vceq.f32 q7, q1, q5") /* q7 = [L == 0] */ \
1470-
__ASM_EMIT("vcgt.f32 q8, q1, q13") /* q8 = [L > 1] */ \
1471-
__ASM_EMIT("vbit q6, q5, q7") /* q6 = [L < 1] & [L != 0] */ \
1472-
__ASM_EMIT("vand q8, q8, q4") /* q8 = D/X & [L > 1] */ \
1473-
__ASM_EMIT("vand q6, q6, q2") /* q6 = D/L & [L < 1] & [L != 0] */ \
1474-
__ASM_EMIT("vorr q2, q8, q6") /* q2 = S = (D/L & [L < 1] & [L != 0]) | (D/X & [L > 1]) */ \
1358+
__ASM_EMIT("vcgt.f32 q8, q1, q14") /* q8 = [L > 0.5] */ \
1359+
__ASM_EMIT("vbit q6, q5, q7") /* q6 = [L <= 0.5] & [L != 0] */ \
1360+
__ASM_EMIT("vand q8, q8, q4") /* q8 = D/X & [L > 0.5] */ \
1361+
__ASM_EMIT("vand q6, q6, q2") /* q6 = D/L & [L <= 0.5] & [L != 0] */ \
1362+
__ASM_EMIT("vorr q2, q8, q6") /* q2 = S = (D/L & [L <= 0.5] & [L != 0]) | (D/X & [L > 0.5]) */ \
14751363
__ASM_EMIT("vmul.f32 q0, q0, q15") /* q0 = H * 1/6 */ \
14761364
__ASM_EMIT("vmul.f32 q2, q2, q14") /* q2 = S * 1/2 */ \
14771365

@@ -1497,7 +1385,7 @@ namespace lsp
14971385
H = (R - G) / d + 4.0f;
14981386
14991387
// Calculate saturation
1500-
if (L < 1.0f)
1388+
if (L < 0.5f)
15011389
S = (L != 0.0f) ? d / L : 0.0f;
15021390
else
15031391
S = (L != 1.0f) ? d / (1.0f - L) : 0.0f;

0 commit comments

Comments
 (0)