Skip to content

Commit 53839f2

Browse files
committed
Merge branch 'avx512' into devel
* Additional optimizations of parallel mathematics functions with AVX-512 instruction set.
2 parents 2e5735a + 9a1b557 commit 53839f2

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

71 files changed

+7205
-72
lines changed

CHANGELOG

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
* Implemented SIMD-optimized gate functions.
88
* Fixed bun in AVX-512 implementation of mid/side conversion functions.
99
* AVX512 optimization of packed complex functions.
10+
* Additional optimizations of parallel mathematics functions with AVX-512 instruction set.
1011

1112
=== 1.0.18 ===
1213
* Fixed compilation regression for 32-bit Clang compiler.

include/private/dsp/arch/generic/pmath/abs_vv.h

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
2-
* Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
3-
* (C) 2020 Vladimir Sadovnikov <[email protected]>
2+
* Copyright (C) 2023 Linux Studio Plugins Project <https://lsp-plug.in/>
3+
* (C) 2023 Vladimir Sadovnikov <[email protected]>
44
*
55
* This file is part of lsp-dsp-lib
66
* Created on: 31 мар. 2020 г.
@@ -33,87 +33,87 @@ namespace lsp
3333
void abs1(float *dst, size_t count)
3434
{
3535
for (size_t i=0; i<count; ++i)
36-
dst[i] = fabs(dst[i]);
36+
dst[i] = fabsf(dst[i]);
3737
}
3838

3939
void abs2(float *dst, const float *src, size_t count)
4040
{
4141
for (size_t i=0; i<count; ++i)
42-
dst[i] = fabs(src[i]);
42+
dst[i] = fabsf(src[i]);
4343
}
4444

4545
void abs_add2(float *dst, const float *src, size_t count)
4646
{
4747
for (size_t i=0; i<count; ++i)
48-
dst[i] += fabs(src[i]);
48+
dst[i] += fabsf(src[i]);
4949
}
5050

5151
void abs_sub2(float *dst, const float *src, size_t count)
5252
{
5353
for (size_t i=0; i<count; ++i)
54-
dst[i] -= fabs(src[i]);
54+
dst[i] -= fabsf(src[i]);
5555
}
5656

5757
void abs_rsub2(float *dst, const float *src, size_t count)
5858
{
5959
for (size_t i=0; i<count; ++i)
60-
dst[i] = fabs(src[i]) - dst[i];
60+
dst[i] = fabsf(src[i]) - dst[i];
6161
}
6262

6363
void abs_mul2(float *dst, const float *src, size_t count)
6464
{
6565
for (size_t i=0; i<count; ++i)
66-
dst[i] *= fabs(src[i]);
66+
dst[i] *= fabsf(src[i]);
6767
}
6868

6969
void abs_div2(float *dst, const float *src, size_t count)
7070
{
7171
for (size_t i=0; i<count; ++i)
72-
dst[i] /= fabs(src[i]);
72+
dst[i] /= fabsf(src[i]);
7373
}
7474

7575
void abs_rdiv2(float *dst, const float *src, size_t count)
7676
{
7777
for (size_t i=0; i<count; ++i)
78-
dst[i] = fabs(src[i]) / dst[i];
78+
dst[i] = fabsf(src[i]) / dst[i];
7979
}
8080

8181
void abs_add3(float *dst, const float *src1, const float *src2, size_t count)
8282
{
8383
for (size_t i=0; i<count; ++i)
84-
dst[i] = src1[i] + fabs(src2[i]);
84+
dst[i] = src1[i] + fabsf(src2[i]);
8585
}
8686

8787
void abs_sub3(float *dst, const float *src1, const float *src2, size_t count)
8888
{
8989
for (size_t i=0; i<count; ++i)
90-
dst[i] = src1[i] - fabs(src2[i]);
90+
dst[i] = src1[i] - fabsf(src2[i]);
9191
}
9292

9393
void abs_rsub3(float *dst, const float *src1, const float *src2, size_t count)
9494
{
9595
for (size_t i=0; i<count; ++i)
96-
dst[i] = fabs(src2[i]) - src1[i];
96+
dst[i] = fabsf(src2[i]) - src1[i];
9797
}
9898

9999
void abs_mul3(float *dst, const float *src1, const float *src2, size_t count)
100100
{
101101
for (size_t i=0; i<count; ++i)
102-
dst[i] = src1[i] * fabs(src2[i]);
102+
dst[i] = src1[i] * fabsf(src2[i]);
103103
}
104104

105105
void abs_div3(float *dst, const float *src1, const float *src2, size_t count)
106106
{
107107
for (size_t i=0; i<count; ++i)
108-
dst[i] = src1[i] / fabs(src2[i]);
108+
dst[i] = src1[i] / fabsf(src2[i]);
109109
}
110110

111111
void abs_rdiv3(float *dst, const float *src1, const float *src2, size_t count)
112112
{
113113
for (size_t i=0; i<count; ++i)
114-
dst[i] = fabs(src2[i]) / src1[i];
114+
dst[i] = fabsf(src2[i]) / src1[i];
115115
}
116-
}
117-
}
116+
} /* namespace generic */
117+
} /* namespace lsp */
118118

119119
#endif /* PRIVATE_DSP_ARCH_GENERIC_PMATH_ABS_VV_H_ */

include/private/dsp/arch/x86/avx/pmath/fmop_kx.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
2-
* Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
3-
* (C) 2020 Vladimir Sadovnikov <[email protected]>
2+
* Copyright (C) 2023 Linux Studio Plugins Project <https://lsp-plug.in/>
3+
* (C) 2023 Vladimir Sadovnikov <[email protected]>
44
*
55
* This file is part of lsp-dsp-lib
66
* Created on: 31 мар. 2020 г.
@@ -30,7 +30,7 @@ namespace lsp
3030
{
3131
namespace avx
3232
{
33-
#define OP_DSEL(a, b) a
33+
#define OP_DSEL(a, b) a
3434
#define OP_RSEL(a, b) b
3535

3636
#define FMADDSUB_K3_CORE(DST, SRC1, SRC2, OP) \
@@ -659,7 +659,7 @@ namespace lsp
659659

660660
#undef OP_DSEL
661661
#undef OP_RSEL
662-
}
663-
}
662+
} /* namespace avx */
663+
} /* namespace lsp */
664664

665665
#endif /* PRIVATE_DSP_ARCH_X86_AVX_PMATH_FMOP_KX_H_ */

include/private/dsp/arch/x86/avx/pmath/normalize.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ namespace lsp
3232
{
3333

3434
IF_ARCH_X86(
35-
static const uint32_t normalize_const[] __lsp_aligned16 =
35+
static const uint32_t normalize_const[] __lsp_aligned32 =
3636
{
3737
LSP_DSP_VEC8(0x7fffffff),
3838
LSP_DSP_VEC8(0x3f800000) /* 1.0f */

include/private/dsp/arch/x86/avx/pmath/ssqrt.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,8 @@ namespace lsp
6363
__ASM_EMIT("vsqrtps %%ymm1, %%ymm1") \
6464
__ASM_EMIT("vmovups %%ymm0, 0x00(%[" DST "], %[off])") \
6565
__ASM_EMIT("vmovups %%ymm1, 0x20(%[" DST "], %[off])") \
66-
__ASM_EMIT("add $0x40, %[off]") \
6766
__ASM_EMIT("sub $16, %[count]") \
68-
__ASM_EMIT("jae 1b") \
67+
__ASM_EMIT("add $0x40, %[off]") \
6968
__ASM_EMIT("4:") \
7069
/* 8x block */ \
7170
__ASM_EMIT("add $8, %[count]") \

include/private/dsp/arch/x86/avx2/pmath/log.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2358,7 +2358,6 @@ namespace lsp
23582358
__ASM_EMIT("vmovups %%ymm0, 0x00(%[dst])")
23592359
__ASM_EMIT("add $0x20, %[dst]")
23602360
__ASM_EMIT("sub $8, %[count]")
2361-
__ASM_EMIT("jae 1b")
23622361
__ASM_EMIT("6:")
23632362
// x4 block
23642363
__ASM_EMIT("add $4, %[count]")
@@ -2430,7 +2429,6 @@ namespace lsp
24302429
__ASM_EMIT("vmovups %%ymm0, 0x00(%[dst])")
24312430
__ASM_EMIT("add $0x20, %[dst]")
24322431
__ASM_EMIT("sub $8, %[count]")
2433-
__ASM_EMIT("jae 1b")
24342432
__ASM_EMIT("4:")
24352433
// x4 block
24362434
__ASM_EMIT("add $4, %[count]")

include/private/dsp/arch/x86/avx512/pmath.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,16 @@
2626
#error "This header should not be included directly"
2727
#endif /* PRIVATE_DSP_ARCH_X86_AVX512_IMPL */
2828

29+
#include <private/dsp/arch/x86/avx512/pmath/abs_vv.h>
30+
#include <private/dsp/arch/x86/avx512/pmath/exp.h>
31+
#include <private/dsp/arch/x86/avx512/pmath/fmop_kx.h>
32+
#include <private/dsp/arch/x86/avx512/pmath/fmop_vv.h>
33+
#include <private/dsp/arch/x86/avx512/pmath/log.h>
34+
#include <private/dsp/arch/x86/avx512/pmath/lramp.h>
35+
#include <private/dsp/arch/x86/avx512/pmath/minmax.h>
36+
#include <private/dsp/arch/x86/avx512/pmath/normalize.h>
37+
#include <private/dsp/arch/x86/avx512/pmath/op_kx.h>
38+
#include <private/dsp/arch/x86/avx512/pmath/op_vv.h>
2939
#include <private/dsp/arch/x86/avx512/pmath/sqr.h>
3040
#include <private/dsp/arch/x86/avx512/pmath/ssqrt.h>
3141

0 commit comments

Comments
 (0)