lsp-plugins
diff --git a/‎CHANGELOG
Lines changed: 12 additions & 0 deletions b/‎CHANGELOG
Lines changed: 12 additions & 0 deletions
diff --git a/‎dependencies.mk
Lines changed: 1 addition & 1 deletion b/‎dependencies.mk
Lines changed: 1 addition & 1 deletion
diff --git a/‎include/lsp-plug.in/dsp/version.h
Lines changed: 1 addition & 1 deletion b/‎include/lsp-plug.in/dsp/version.h
Lines changed: 1 addition & 1 deletion
diff --git a/‎include/private/dsp/arch/aarch64/asimd/graphics.h
Lines changed: 31 additions & 0 deletions b/‎include/private/dsp/arch/aarch64/asimd/graphics.h
Lines changed: 31 additions & 0 deletions
diff --git a/‎include/private/dsp/arch/aarch64/asimd/graphics/pixelfmt.h
Lines changed: 135 additions & 0 deletions b/‎include/private/dsp/arch/aarch64/asimd/graphics/pixelfmt.h
Lines changed: 135 additions & 0 deletions
diff --git a/‎include/private/dsp/arch/arm/neon-d32/graphics.h
Lines changed: 9 additions & 121 deletions b/‎include/private/dsp/arch/arm/neon-d32/graphics.h
Lines changed: 9 additions & 121 deletions
@@ -2,6 +2,18 @@
 * RECENT CHANGES
 *******************************************************************************
 
+=== 0.5.10 ===
+* Updated build scripts.
+* Reviewed generic abgr32_to_bgrff function implementation for Big endian platforms.
+* Submitted x86/x86_64 SSE2-optimized function abgr32_to_bgrff.
+* Submitted x86/x86_64 AVX2-optimized function abgr32_to_bgrff.
+* Submitted ARMv7 NEON-optimized function abgr32_to_bgrff.
+* Submitted AArch64 ASIMD-optimized function abgr32_to_bgrff.
+* Fixed lacking export for SSE2-optimized functions support: fmmod3, fmrmod3, fmmod4, fmrmod4.
+* Added support of AMD Zen 2 architecture for CPU tuning.
+* More careful AVX2 support detection for AMD processors.
+* Fixed improper RGBA to HSLA conversion in rgba_to_hsla routine.
+
 === 0.5.9 ===
 * Updated build scripts that now use tags without prefixes first.
 
 
@@ -19,7 +19,7 @@
 #
 
 # Variables that describe dependencies
-LSP_COMMON_LIB_VERSION     := 1.0.8
+LSP_COMMON_LIB_VERSION     := 1.0.10
 LSP_COMMON_LIB_NAME        := lsp-common-lib
 LSP_COMMON_LIB_TYPE        := src
 LSP_COMMON_LIB_URL         := https://github.com/sadko4u/$(LSP_COMMON_LIB_NAME).git
 
@@ -25,7 +25,7 @@
 // Define version of headers
 #define LSP_DSP_LIB_MAJOR           0
 #define LSP_DSP_LIB_MINOR           5
-#define LSP_DSP_LIB_MICRO           9
+#define LSP_DSP_LIB_MICRO           10
 
 #ifdef LSP_DSP_LIB_BUILTIN
     #define LSP_DSP_LIB_CPPEXPORT
 
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
+ *           (C) 2020 Vladimir Sadovnikov <[email protected]>
+ *
+ * This file is part of lsp-dsp-lib
+ * Created on: 11 окт. 2020 г.
+ *
+ * lsp-dsp-lib is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * any later version.
+ *
+ * lsp-dsp-lib is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with lsp-dsp-lib. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#ifndef PRIVATE_DSP_ARCH_AARCH64_ASIMD_GRAPHICS_H_
+#define PRIVATE_DSP_ARCH_AARCH64_ASIMD_GRAPHICS_H_
+
+#ifndef PRIVATE_DSP_ARCH_AARCH64_ASIMD_IMPL
+    #error "This header should not be included directly"
+#endif /* PRIVATE_DSP_ARCH_AARCH64_ASIMD_IMPL */
+
+#include <private/dsp/arch/aarch64/asimd/graphics/pixelfmt.h>
+
+#endif /* PRIVATE_DSP_ARCH_AARCH64_ASIMD_GRAPHICS_H_ */
@@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
+ *           (C) 2020 Vladimir Sadovnikov <[email protected]>
+ *
+ * This file is part of lsp-dsp-lib
+ * Created on: 11 окт. 2020 г.
+ *
+ * lsp-dsp-lib is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * any later version.
+ *
+ * lsp-dsp-lib is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with lsp-dsp-lib. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#ifndef PRIVATE_DSP_ARCH_AARCH64_ASIMD_GRAPHICS_PIXELFMT_H_
+#define PRIVATE_DSP_ARCH_AARCH64_ASIMD_GRAPHICS_PIXELFMT_H_
+
+#ifndef PRIVATE_DSP_ARCH_AARCH64_ASIMD_IMPL
+    #error "This header should not be included directly"
+#endif /* PRIVATE_DSP_ARCH_AARCH64_ASIMD_IMPL */
+
+namespace lsp
+{
+    namespace asimd
+    {
+        IF_ARCH_AARCH64(
+            static const uint32_t abgr32_to_bgrff32_const[] __lsp_aligned32 =
+            {
+                LSP_DSP_VEC8(0xff000000)
+            };
+        );
+
+        void abgr32_to_bgrff32(void *dst, const void *src, size_t count)
+        {
+            ARCH_AARCH64_ASM
+            (
+                __ASM_EMIT("ldp         q16, q17, [%[MASK]]")
+                __ASM_EMIT("subs        %[count], %[count], #32")
+                __ASM_EMIT("ldp         q18, q19, [%[MASK]]")
+                __ASM_EMIT("b.lo        2f")
+                // 32x blocks
+                __ASM_EMIT("ldp         q20, q21, [%[MASK]]")
+                __ASM_EMIT("ldp         q22, q23, [%[MASK]]")
+                __ASM_EMIT("1:")
+                __ASM_EMIT("ldp         q0, q1, [%[src], #0x00]")
+                __ASM_EMIT("ldp         q2, q3, [%[src], #0x20]")
+                __ASM_EMIT("ldp         q4, q5, [%[src], #0x40]")
+                __ASM_EMIT("ldp         q6, q7, [%[src], #0x60]")
+                __ASM_EMIT("sri         v16.4s, v0.4s, #8")
+                __ASM_EMIT("sri         v17.4s, v1.4s, #8")
+                __ASM_EMIT("sri         v18.4s, v2.4s, #8")
+                __ASM_EMIT("sri         v19.4s, v3.4s, #8")
+                __ASM_EMIT("sri         v20.4s, v4.4s, #8")
+                __ASM_EMIT("sri         v21.4s, v5.4s, #8")
+                __ASM_EMIT("sri         v22.4s, v6.4s, #8")
+                __ASM_EMIT("sri         v23.4s, v7.4s, #8")
+                __ASM_EMIT("stp         q16, q17, [%[dst], #0x00]")
+                __ASM_EMIT("stp         q18, q19, [%[dst], #0x20]")
+                __ASM_EMIT("stp         q20, q21, [%[dst], #0x40]")
+                __ASM_EMIT("stp         q22, q23, [%[dst], #0x60]")
+                __ASM_EMIT("add         %[src], %[src], #0x80")
+                __ASM_EMIT("subs        %[count], %[count], #32")
+                __ASM_EMIT("add         %[dst], %[dst], #0x80")
+                __ASM_EMIT("b.hs        1b")
+                // 16x block
+                __ASM_EMIT("2:")
+                __ASM_EMIT("adds        %[count], %[count], #16")
+                __ASM_EMIT("b.lt        4f")
+                __ASM_EMIT("ldp         q0, q1, [%[src], #0x00]")
+                __ASM_EMIT("ldp         q2, q3, [%[src], #0x20]")
+                __ASM_EMIT("sri         v16.4s, v0.4s, #8")
+                __ASM_EMIT("sri         v17.4s, v1.4s, #8")
+                __ASM_EMIT("sri         v18.4s, v2.4s, #8")
+                __ASM_EMIT("sri         v19.4s, v3.4s, #8")
+                __ASM_EMIT("stp         q16, q17, [%[dst], #0x00]")
+                __ASM_EMIT("stp         q18, q19, [%[dst], #0x20]")
+                __ASM_EMIT("add         %[src], %[src], #0x40")
+                __ASM_EMIT("sub         %[count], %[count], #16")
+                __ASM_EMIT("add         %[dst], %[dst], #0x40")
+                // 8x block
+                __ASM_EMIT("4:")
+                __ASM_EMIT("adds        %[count], %[count], #8")
+                __ASM_EMIT("b.lt        6f")
+                __ASM_EMIT("ldp         q0, q1, [%[src], #0x00]")
+                __ASM_EMIT("sri         v16.4s, v0.4s, #8")
+                __ASM_EMIT("sri         v17.4s, v1.4s, #8")
+                __ASM_EMIT("stp         q16, q17, [%[dst], #0x00]")
+                __ASM_EMIT("add         %[src], %[src], #0x20")
+                __ASM_EMIT("add         %[dst], %[dst], #0x20")
+                __ASM_EMIT("sub         %[count], %[count], #8")
+                // 4x block
+                __ASM_EMIT("6:")
+                __ASM_EMIT("adds        %[count], %[count], #4")
+                __ASM_EMIT("b.lt        8f")
+                __ASM_EMIT("ldr         q0, [%[src], #0x00]")
+                __ASM_EMIT("sri         v16.4s, v0.4s, #8")
+                __ASM_EMIT("str         q16, [%[dst], #0x00]")
+                __ASM_EMIT("add         %[src], %[src], #0x10")
+                __ASM_EMIT("add         %[dst], %[dst], #0x10")
+                __ASM_EMIT("sub         %[count], %[count], #4")
+                // 1x blocks
+                __ASM_EMIT("8:")
+                __ASM_EMIT("adds        %[count], %[count], #3")
+                __ASM_EMIT("b.lt        10f")
+                __ASM_EMIT("9:")
+                __ASM_EMIT("ld1r        {v0.4s}, [%[src]]")
+                __ASM_EMIT("sri         v16.4s, v0.4s, #8")
+                __ASM_EMIT("subs        %[count], %[count], #1")
+                __ASM_EMIT("st1         {v16.s}[0], [%[dst]]")
+                __ASM_EMIT("add         %[src], %[src], #0x04")
+                __ASM_EMIT("add         %[dst], %[dst], #0x04")
+                __ASM_EMIT("b.ge        9b")
+                __ASM_EMIT("10:")
+
+                : [dst] "+r" (dst), [src] "+r" (src),
+                  [count] "+r" (count)
+                : [MASK] "r" (&abgr32_to_bgrff32_const[0])
+                : "cc", "memory",
+                  "q0", "q1", "q2", "q3",
+                  "q4", "q5", "q6", "q7",
+                  "q16", "q17", "q18", "q19",
+                  "q20", "q21", "q22", "q23"
+            );
+        }
+    }
+}
+
+#endif /* PRIVATE_DSP_ARCH_AARCH64_ASIMD_GRAPHICS_PIXELFMT_H_ */
@@ -26,6 +26,8 @@
     #error "This header should not be included directly"
 #endif /* PRIVATE_DSP_ARCH_ARM_NEON_D32_IMPL */
 
+#include <private/dsp/arch/arm/neon-d32/graphics/pixelfmt.h>
+
 namespace lsp
 {
     namespace neon_d32
@@ -966,120 +968,6 @@ namespace lsp
             );
         }
 
-        void rgba32_to_bgra32(void *dst, const void *src, size_t count)
-        {
-            IF_ARCH_ARM(
-                uint32_t mask;
-                uint32_t t1, t2;
-            );
-
-            ARCH_ARM_ASM(
-                // 64x blocks
-                __ASM_EMIT("subs        %[count], $64")
-                __ASM_EMIT("blo         2f")
-                __ASM_EMIT("1:")
-                __ASM_EMIT("vld4.8      {q0-q1}, [%[src]]!") // d0 = R, d1 = G, d2 = B, d3 = A
-                __ASM_EMIT("vld4.8      {q2-q3}, [%[src]]!")
-                __ASM_EMIT("vswp        d0, d2")
-                __ASM_EMIT("vld4.8      {q4-q5}, [%[src]]!")
-                __ASM_EMIT("vst4.8      {q0-q1}, [%[dst]]!")
-                __ASM_EMIT("vswp        d4, d6")
-                __ASM_EMIT("vld4.8      {q6-q7}, [%[src]]!")
-                __ASM_EMIT("vst4.8      {q2-q3}, [%[dst]]!")
-                __ASM_EMIT("vswp        d8, d10")
-                __ASM_EMIT("vld4.8      {q8-q9}, [%[src]]!")
-                __ASM_EMIT("vst4.8      {q4-q5}, [%[dst]]!")
-                __ASM_EMIT("vswp        d12, d14")
-                __ASM_EMIT("vld4.8      {q10-q11}, [%[src]]!")
-                __ASM_EMIT("vst4.8      {q6-q7}, [%[dst]]!")
-                __ASM_EMIT("vswp        d16, d18")
-                __ASM_EMIT("vld4.8      {q12-q13}, [%[src]]!")
-                __ASM_EMIT("vst4.8      {q8-q9}, [%[dst]]!")
-                __ASM_EMIT("vswp        d20, d22")
-                __ASM_EMIT("vld4.8      {q14-q15}, [%[src]]!")
-                __ASM_EMIT("vst4.8      {q10-q11}, [%[dst]]!")
-                __ASM_EMIT("vswp        d24, d26")
-                __ASM_EMIT("vswp        d28, d30")
-                __ASM_EMIT("vst4.8      {q12-q13}, [%[dst]]!")
-                __ASM_EMIT("vst4.8      {q14-q15}, [%[dst]]!")
-                __ASM_EMIT("subs        %[count], $64")
-                __ASM_EMIT("bhs         1b")
-
-                // 32x blocks
-                __ASM_EMIT("2:")
-                __ASM_EMIT("adds        %[count], $32")
-                __ASM_EMIT("blt         4f")
-                __ASM_EMIT("vld4.8      {q0-q1}, [%[src]]!") // d0 = R, d1 = G, d2 = B, d3 = A
-                __ASM_EMIT("vld4.8      {q2-q3}, [%[src]]!")
-                __ASM_EMIT("vswp        d0, d2")
-                __ASM_EMIT("vld4.8      {q4-q5}, [%[src]]!")
-                __ASM_EMIT("vst4.8      {q0-q1}, [%[dst]]!")
-                __ASM_EMIT("vswp        d4, d6")
-                __ASM_EMIT("vld4.8      {q6-q7}, [%[src]]!")
-                __ASM_EMIT("vst4.8      {q2-q3}, [%[dst]]!")
-                __ASM_EMIT("vswp        d8, d10")
-                __ASM_EMIT("vswp        d12, d14")
-                __ASM_EMIT("vst4.8      {q4-q5}, [%[dst]]!")
-                __ASM_EMIT("vst4.8      {q6-q7}, [%[dst]]!")
-                __ASM_EMIT("sub         %[count], $32")
-
-                // 16x blocks
-                __ASM_EMIT("4:")
-                __ASM_EMIT("adds        %[count], $16")
-                __ASM_EMIT("blt         6f")
-                __ASM_EMIT("vld4.8      {q0-q1}, [%[src]]!") // d0 = R, d1 = G, d2 = B, d3 = A
-                __ASM_EMIT("vld4.8      {q2-q3}, [%[src]]!")
-                __ASM_EMIT("vswp        d0, d2")
-                __ASM_EMIT("vswp        d4, d6")
-                __ASM_EMIT("vst4.8      {q0-q1}, [%[dst]]!")
-                __ASM_EMIT("vst4.8      {q2-q3}, [%[dst]]!")
-                __ASM_EMIT("sub         %[count], $16")
-
-                // 8x blocks
-                __ASM_EMIT("6:")
-                __ASM_EMIT("adds        %[count], $8")
-                __ASM_EMIT("blt         8f")
-                __ASM_EMIT("vld4.8      {q0-q1}, [%[src]]!") // d0 = R, d1 = G, d2 = B, d3 = A
-                __ASM_EMIT("vswp        d0, d2")
-                __ASM_EMIT("vst4.8      {q0-q1}, [%[dst]]!")
-                __ASM_EMIT("sub         %[count], $8")
-
-                // 4x blocks
-                __ASM_EMIT("8:")
-                __ASM_EMIT("adds        %[count], $4")
-                __ASM_EMIT("blt         10f")
-                __ASM_EMIT("vld2.8      {q0}, [%[src]]!") // d0 = RB, d1 = GA
-                __ASM_EMIT("vshl.i16    d2, d0, $8")
-                __ASM_EMIT("vshr.u16    d0, d0, $8")
-                __ASM_EMIT("vorr        d0, d2")
-                __ASM_EMIT("vst2.8      {q0}, [%[dst]]!")
-                __ASM_EMIT("sub         %[count], $4")
-
-                // 1x blocks
-                __ASM_EMIT("10:")
-                __ASM_EMIT("adds        %[count], $3")
-                __ASM_EMIT("blt         12f")
-                __ASM_EMIT("mov         %[mask], $0xff")
-                __ASM_EMIT("orr         %[mask], $0xff0000")           // mask = ff 00 ff 00
-                __ASM_EMIT("11:")
-                __ASM_EMIT("ldr         %[t1], [%[src]], $4")               // t1 = R G B A
-                __ASM_EMIT("and         %[t2], %[t1], %[mask]")             // t2 = R 0 B 0
-                __ASM_EMIT("and         %[t1], %[t1], %[mask], lsl $8")     // t1 = 0 G 0 A
-                __ASM_EMIT("orr         %[t1], %[t1], %[t2], ror $16")      // t1 = B G R A
-                __ASM_EMIT("str         %[t1], [%[dst]], $4")
-                __ASM_EMIT("subs        %[count], $1")
-                __ASM_EMIT("bge         11b")
-
-                __ASM_EMIT("12:")
-                : [src] "+r" (src), [dst] "+r" (dst), [count] "+r" (count),
-                  [t1] "=&r" (t1), [t2] "=&r" (t2), [mask] "=&r" (mask)
-                :
-                : "cc", "memory",
-                  "q0", "q1", "q2", "q3" , "q4", "q5", "q6", "q7",
-                  "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
-            );
-        }
-
     #define FILL4_CORE \
         __ASM_EMIT("vld1.32     {q0}, [%[c4]]")     /* q0 = c0 c1 c2 c3 */ \
         __ASM_EMIT("vmov        q1, q0")            /* q1 = c0 c1 c2 c3 */ \
@@ -1465,13 +1353,13 @@ namespace lsp
         __ASM_EMIT("vmul.f32        q4, q6, q9")                /* q4 = D/X */ \
         __ASM_EMIT("veor            q5, q5")                    /* q5 = 0 */ \
         \
-        __ASM_EMIT("vclt.f32        q6, q1, q13")               /* q6 = [L < 1] */ \
+        __ASM_EMIT("vcle.f32        q6, q1, q14")               /* q6 = [L <= 0.5] */ \
         __ASM_EMIT("vceq.f32        q7, q1, q5")                /* q7 = [L == 0] */ \
-        __ASM_EMIT("vcgt.f32        q8, q1, q13")               /* q8 = [L > 1] */ \
-        __ASM_EMIT("vbit            q6, q5, q7")                /* q6 = [L < 1] & [L != 0] */ \
-        __ASM_EMIT("vand            q8, q8, q4")                /* q8 = D/X & [L > 1] */ \
-        __ASM_EMIT("vand            q6, q6, q2")                /* q6 = D/L & [L < 1] & [L != 0] */ \
-        __ASM_EMIT("vorr            q2, q8, q6")                /* q2 = S = (D/L & [L < 1] & [L != 0]) | (D/X & [L > 1]) */ \
+        __ASM_EMIT("vcgt.f32        q8, q1, q14")               /* q8 = [L > 0.5] */ \
+        __ASM_EMIT("vbit            q6, q5, q7")                /* q6 = [L <= 0.5] & [L != 0] */ \
+        __ASM_EMIT("vand            q8, q8, q4")                /* q8 = D/X & [L > 0.5] */ \
+        __ASM_EMIT("vand            q6, q6, q2")                /* q6 = D/L & [L <= 0.5] & [L != 0] */ \
+        __ASM_EMIT("vorr            q2, q8, q6")                /* q2 = S = (D/L & [L <= 0.5] & [L != 0]) | (D/X & [L > 0.5]) */ \
         __ASM_EMIT("vmul.f32        q0, q0, q15")               /* q0 = H * 1/6 */ \
         __ASM_EMIT("vmul.f32        q2, q2, q14")               /* q2 = S * 1/2 */ \
 
@@ -1497,7 +1385,7 @@ namespace lsp
             H = (R - G) / d + 4.0f;
 
         // Calculate saturation
-        if (L < 1.0f)
+        if (L < 0.5f)
             S = (L != 0.0f) ? d / L : 0.0f;
         else
             S = (L != 1.0f) ? d / (1.0f - L) : 0.0f;
Original file line number	Diff line number	Diff line change
`@@ -19,7 +19,7 @@`
`19`	`19`	`#`
`20`	`20`
`21`	`21`	`# Variables that describe dependencies`
`22`		`-LSP_COMMON_LIB_VERSION := 1.0.8`
	`22`	`+LSP_COMMON_LIB_VERSION := 1.0.10`
`23`	`23`	`LSP_COMMON_LIB_NAME := lsp-common-lib`
`24`	`24`	`LSP_COMMON_LIB_TYPE := src`
`25`	`25`	`LSP_COMMON_LIB_URL := https://github.com/sadko4u/$(LSP_COMMON_LIB_NAME).git`