Skip to content

Commit f107893

Browse files
committed
AVX-512 implementation of complex_div2, complex_rdiv2, complex_div3
1 parent 5a62a05 commit f107893

File tree

7 files changed

+492
-79
lines changed

7 files changed

+492
-79
lines changed

include/private/dsp/arch/x86/avx/complex.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
2-
* Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
3-
* (C) 2020 Vladimir Sadovnikov <[email protected]>
2+
* Copyright (C) 2023 Linux Studio Plugins Project <https://lsp-plug.in/>
3+
* (C) 2023 Vladimir Sadovnikov <[email protected]>
44
*
55
* This file is part of lsp-dsp-lib
66
* Created on: 31 мар. 2020 г.
@@ -507,7 +507,7 @@ namespace lsp
507507
[CC] "o" (complex_div_const)
508508
: "cc", "memory",
509509
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
510-
"%xmm4", "%xmm5"
510+
"%xmm4", "%xmm5", "%xmm6", "%xmm7"
511511
);
512512
}
513513

include/private/dsp/arch/x86/avx512/complex.h

Lines changed: 311 additions & 0 deletions
Large diffs are not rendered by default.

src/main/x86/avx512.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,9 @@
8080
CEXPORT1(vl, complex_mul2);
8181
CEXPORT1(vl, complex_mul3);
8282
CEXPORT1(vl, complex_mod);
83+
CEXPORT1(vl, complex_div2);
84+
CEXPORT1(vl, complex_rdiv2);
85+
CEXPORT1(vl, complex_div3);
8386
}
8487
} /* namespace avx2 */
8588
} /* namespace lsp */

src/test/ptest/complex/div2.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
2-
* Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
3-
* (C) 2020 Vladimir Sadovnikov <[email protected]>
2+
* Copyright (C) 2023 Linux Studio Plugins Project <https://lsp-plug.in/>
3+
* (C) 2023 Vladimir Sadovnikov <[email protected]>
44
*
55
* This file is part of lsp-dsp-lib
66
* Created on: 31 мар. 2020 г.
@@ -51,6 +51,12 @@ namespace lsp
5151
void complex_div2_fma3(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t count);
5252
void complex_rdiv2_fma3(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t count);
5353
}
54+
55+
namespace avx512
56+
{
57+
void complex_div2(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t count);
58+
void complex_rdiv2(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t count);
59+
}
5460
)
5561

5662
IF_ARCH_ARM(
@@ -119,6 +125,8 @@ PTEST_BEGIN("dsp.complex", div2, 5, 1000)
119125
IF_ARCH_X86(CALL(avx::complex_rdiv2));
120126
IF_ARCH_X86(CALL(avx::complex_div2_fma3));
121127
IF_ARCH_X86(CALL(avx::complex_rdiv2_fma3));
128+
IF_ARCH_X86(CALL(avx512::complex_div2));
129+
IF_ARCH_X86(CALL(avx512::complex_rdiv2));
122130

123131
IF_ARCH_ARM(CALL(neon_d32::complex_div2));
124132
IF_ARCH_ARM(CALL(neon_d32::complex_rdiv2));

src/test/ptest/complex/div3.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
2-
* Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
3-
* (C) 2020 Vladimir Sadovnikov <[email protected]>
2+
* Copyright (C) 2023 Linux Studio Plugins Project <https://lsp-plug.in/>
3+
* (C) 2023 Vladimir Sadovnikov <[email protected]>
44
*
55
* This file is part of lsp-dsp-lib
66
* Created on: 31 мар. 2020 г.
@@ -46,6 +46,11 @@ namespace lsp
4646
void complex_div3(float *dst_re, float *dst_im, const float *t_re, const float *t_im, const float *b_re, const float *b_im, size_t count);
4747
void complex_div3_fma3(float *dst_re, float *dst_im, const float *t_re, const float *t_im, const float *b_re, const float *b_im, size_t count);
4848
}
49+
50+
namespace avx512
51+
{
52+
void complex_div3(float *dst_re, float *dst_im, const float *t_re, const float *t_im, const float *b_re, const float *b_im, size_t count);
53+
}
4954
)
5055

5156
IF_ARCH_ARM(
@@ -108,6 +113,7 @@ PTEST_BEGIN("dsp.complex", div3, 5, 1000)
108113
IF_ARCH_X86(CALL(sse::complex_div3));
109114
IF_ARCH_X86(CALL(avx::complex_div3));
110115
IF_ARCH_X86(CALL(avx::complex_div3_fma3));
116+
IF_ARCH_X86(CALL(avx512::complex_div3));
111117
IF_ARCH_ARM(CALL(neon_d32::complex_div3));
112118
IF_ARCH_AARCH64(CALL(asimd::complex_div3));
113119

src/test/utest/complex/div.cpp renamed to src/test/utest/complex/div2.cpp

Lines changed: 11 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
2-
* Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
3-
* (C) 2020 Vladimir Sadovnikov <[email protected]>
2+
* Copyright (C) 2023 Linux Studio Plugins Project <https://lsp-plug.in/>
3+
* (C) 2023 Vladimir Sadovnikov <[email protected]>
44
*
55
* This file is part of lsp-dsp-lib
66
* Created on: 31 мар. 2020 г.
@@ -29,26 +29,28 @@ namespace lsp
2929
{
3030
void complex_div2(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t count);
3131
void complex_rdiv2(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t count);
32-
void complex_div3(float *dst_re, float *dst_im, const float *t_re, const float *t_im, const float *b_re, const float *b_im, size_t count);
3332
}
3433

3534
IF_ARCH_X86(
3635
namespace sse
3736
{
3837
void complex_div2(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t count);
3938
void complex_rdiv2(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t count);
40-
void complex_div3(float *dst_re, float *dst_im, const float *t_re, const float *t_im, const float *b_re, const float *b_im, size_t count);
4139
}
4240

4341
namespace avx
4442
{
4543
void complex_div2(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t count);
4644
void complex_rdiv2(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t count);
47-
void complex_div3(float *dst_re, float *dst_im, const float *t_re, const float *t_im, const float *b_re, const float *b_im, size_t count);
4845

4946
void complex_div2_fma3(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t count);
5047
void complex_rdiv2_fma3(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t count);
51-
void complex_div3_fma3(float *dst_re, float *dst_im, const float *t_re, const float *t_im, const float *b_re, const float *b_im, size_t count);
48+
}
49+
50+
namespace avx512
51+
{
52+
void complex_div2(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t count);
53+
void complex_rdiv2(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t count);
5254
}
5355
)
5456

@@ -57,7 +59,6 @@ namespace lsp
5759
{
5860
void complex_div2(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t count);
5961
void complex_rdiv2(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t count);
60-
void complex_div3(float *dst_re, float *dst_im, const float *t_re, const float *t_im, const float *b_re, const float *b_im, size_t count);
6162
}
6263
)
6364

@@ -66,15 +67,13 @@ namespace lsp
6667
{
6768
void complex_div2(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t count);
6869
void complex_rdiv2(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t count);
69-
void complex_div3(float *dst_re, float *dst_im, const float *t_re, const float *t_im, const float *b_re, const float *b_im, size_t count);
7070
}
7171
)
7272
}
7373

7474
typedef void (* complex_div2_t) (float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t count);
75-
typedef void (* complex_div3_t) (float *dst_re, float *dst_im, const float *t_re, const float *t_im, const float *b_re, const float *b_im, size_t count);
7675

77-
UTEST_BEGIN("dsp.complex", div)
76+
UTEST_BEGIN("dsp.complex", div2)
7877

7978
void call(const char *text, size_t align, complex_div2_t func1, complex_div2_t func2)
8079
{
@@ -127,85 +126,25 @@ UTEST_BEGIN("dsp.complex", div)
127126
}
128127
}
129128

130-
void call(const char *text, size_t align, complex_div3_t func1, complex_div3_t func2)
131-
{
132-
if (!UTEST_SUPPORTED(func1))
133-
return;
134-
if (!UTEST_SUPPORTED(func2))
135-
return;
136-
137-
UTEST_FOREACH(count, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
138-
32, 33, 37, 48, 49, 64, 65, 0x3f, 100, 999, 0x1fff)
139-
{
140-
for (size_t mask=0; mask <= 0x3f; ++mask)
141-
{
142-
printf("Testing %s on input buffer of %d numbers, mask=0x%x...\n", text, int(count), int(mask));
143-
144-
FloatBuffer src1_re(count, align, mask & 0x01);
145-
FloatBuffer src1_im(count, align, mask & 0x02);
146-
src1_re.randomize_sign();
147-
src1_im.randomize_sign();
148-
FloatBuffer src2_re(count, align, mask & 0x04);
149-
FloatBuffer src2_im(count, align, mask & 0x08);
150-
src2_re.randomize_sign();
151-
src2_im.randomize_sign();
152-
FloatBuffer dst1_re(count, align, mask & 0x10);
153-
FloatBuffer dst1_im(count, align, mask & 0x20);
154-
FloatBuffer dst2_re(count, align, mask & 0x10);
155-
FloatBuffer dst2_im(count, align, mask & 0x20);
156-
157-
// Call functions
158-
func1(dst1_re, dst1_im, src1_re, src1_im, src2_re, src2_im, count);
159-
func2(dst2_re, dst2_im, src1_re, src1_im, src2_re, src2_im, count);
160-
161-
UTEST_ASSERT_MSG(src1_re.valid(), "Source buffer 1 RE corrupted");
162-
UTEST_ASSERT_MSG(src1_im.valid(), "Source buffer 1 IM corrupted");
163-
UTEST_ASSERT_MSG(src2_re.valid(), "Source buffer 2 RE corrupted");
164-
UTEST_ASSERT_MSG(src2_im.valid(), "Source buffer 2 IM corrupted");
165-
UTEST_ASSERT_MSG(dst1_re.valid(), "Destination buffer 1 RE corrupted");
166-
UTEST_ASSERT_MSG(dst1_im.valid(), "Destination buffer 1 IM corrupted");
167-
UTEST_ASSERT_MSG(dst2_re.valid(), "Destination buffer 2 RE corrupted");
168-
UTEST_ASSERT_MSG(dst2_im.valid(), "Destination buffer 2 IM corrupted");
169-
170-
// Compare buffers
171-
if ((!dst2_re.equals_absolute(dst1_re, 1e-4)) || (!dst2_im.equals_absolute(dst1_im, 1e-4)))
172-
{
173-
src1_re.dump("src1_re");
174-
src1_im.dump("src1_im");
175-
src2_re.dump("src2_re");
176-
src2_im.dump("src2_im");
177-
dst1_re.dump("dst1_re");
178-
dst1_im.dump("dst1_im");
179-
dst2_re.dump("dst2_re");
180-
dst2_im.dump("dst2_im");
181-
UTEST_FAIL_MSG("Output of functions for test '%s' differs", text);
182-
}
183-
}
184-
}
185-
}
186-
187129
UTEST_MAIN
188130
{
189131
#define CALL(generic, func, align) \
190132
call(#func, align, generic, func)
191133

192134
IF_ARCH_X86(CALL(generic::complex_div2, sse::complex_div2, 16));
193135
IF_ARCH_X86(CALL(generic::complex_rdiv2, sse::complex_rdiv2, 16));
194-
IF_ARCH_X86(CALL(generic::complex_div3, sse::complex_div3, 16));
195136
IF_ARCH_X86(CALL(generic::complex_div2, avx::complex_div2, 32));
196137
IF_ARCH_X86(CALL(generic::complex_rdiv2, avx::complex_rdiv2, 32));
197-
IF_ARCH_X86(CALL(generic::complex_div3, avx::complex_div3, 32));
198138
IF_ARCH_X86(CALL(generic::complex_div2, avx::complex_div2_fma3, 32));
199139
IF_ARCH_X86(CALL(generic::complex_rdiv2, avx::complex_rdiv2_fma3, 32));
200-
IF_ARCH_X86(CALL(generic::complex_div3, avx::complex_div3_fma3, 32));
140+
IF_ARCH_X86(CALL(generic::complex_div2, avx512::complex_div2, 64));
141+
IF_ARCH_X86(CALL(generic::complex_rdiv2, avx512::complex_rdiv2, 64));
201142

202143
IF_ARCH_ARM(CALL(generic::complex_div2, neon_d32::complex_div2, 16));
203144
IF_ARCH_ARM(CALL(generic::complex_rdiv2, neon_d32::complex_rdiv2, 16));
204-
IF_ARCH_ARM(CALL(generic::complex_div3, neon_d32::complex_div3, 16));
205145

206146
IF_ARCH_AARCH64(CALL(generic::complex_div2, asimd::complex_div2, 16));
207147
IF_ARCH_AARCH64(CALL(generic::complex_rdiv2, asimd::complex_rdiv2, 16));
208-
IF_ARCH_AARCH64(CALL(generic::complex_div3, asimd::complex_div3, 16));
209148
}
210149

211150
UTEST_END;

src/test/utest/complex/div3.cpp

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
/*
2+
* Copyright (C) 2023 Linux Studio Plugins Project <https://lsp-plug.in/>
3+
* (C) 2023 Vladimir Sadovnikov <[email protected]>
4+
*
5+
* This file is part of lsp-dsp-lib
6+
* Created on: 9 сент. 2023 г.
7+
*
8+
* lsp-dsp-lib is free software: you can redistribute it and/or modify
9+
* it under the terms of the GNU Lesser General Public License as published by
10+
* the Free Software Foundation, either version 3 of the License, or
11+
* any later version.
12+
*
13+
* lsp-dsp-lib is distributed in the hope that it will be useful,
14+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
15+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16+
* GNU Lesser General Public License for more details.
17+
*
18+
* You should have received a copy of the GNU Lesser General Public License
19+
* along with lsp-dsp-lib. If not, see <https://www.gnu.org/licenses/>.
20+
*/
21+
22+
#include <lsp-plug.in/common/types.h>
23+
#include <lsp-plug.in/test-fw/utest.h>
24+
#include <lsp-plug.in/test-fw/FloatBuffer.h>
25+
26+
namespace lsp
27+
{
28+
namespace generic
29+
{
30+
void complex_div3(float *dst_re, float *dst_im, const float *t_re, const float *t_im, const float *b_re, const float *b_im, size_t count);
31+
}
32+
33+
IF_ARCH_X86(
34+
namespace sse
35+
{
36+
void complex_div3(float *dst_re, float *dst_im, const float *t_re, const float *t_im, const float *b_re, const float *b_im, size_t count);
37+
}
38+
39+
namespace avx
40+
{
41+
void complex_div3(float *dst_re, float *dst_im, const float *t_re, const float *t_im, const float *b_re, const float *b_im, size_t count);
42+
void complex_div3_fma3(float *dst_re, float *dst_im, const float *t_re, const float *t_im, const float *b_re, const float *b_im, size_t count);
43+
}
44+
45+
namespace avx512
46+
{
47+
void complex_div3(float *dst_re, float *dst_im, const float *t_re, const float *t_im, const float *b_re, const float *b_im, size_t count);
48+
}
49+
)
50+
51+
IF_ARCH_ARM(
52+
namespace neon_d32
53+
{
54+
void complex_div3(float *dst_re, float *dst_im, const float *t_re, const float *t_im, const float *b_re, const float *b_im, size_t count);
55+
}
56+
)
57+
58+
IF_ARCH_AARCH64(
59+
namespace asimd
60+
{
61+
void complex_div3(float *dst_re, float *dst_im, const float *t_re, const float *t_im, const float *b_re, const float *b_im, size_t count);
62+
}
63+
)
64+
}
65+
66+
typedef void (* complex_div3_t) (float *dst_re, float *dst_im, const float *t_re, const float *t_im, const float *b_re, const float *b_im, size_t count);
67+
68+
UTEST_BEGIN("dsp.complex", div3)
69+
70+
void call(const char *text, size_t align, complex_div3_t func1, complex_div3_t func2)
71+
{
72+
if (!UTEST_SUPPORTED(func1))
73+
return;
74+
if (!UTEST_SUPPORTED(func2))
75+
return;
76+
77+
UTEST_FOREACH(count, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
78+
32, 33, 37, 48, 49, 64, 65, 0x3f, 100, 999, 0x1fff)
79+
{
80+
for (size_t mask=0; mask <= 0x3f; ++mask)
81+
{
82+
printf("Testing %s on input buffer of %d numbers, mask=0x%x...\n", text, int(count), int(mask));
83+
84+
FloatBuffer src1_re(count, align, mask & 0x01);
85+
FloatBuffer src1_im(count, align, mask & 0x02);
86+
src1_re.randomize_sign();
87+
src1_im.randomize_sign();
88+
FloatBuffer src2_re(count, align, mask & 0x04);
89+
FloatBuffer src2_im(count, align, mask & 0x08);
90+
src2_re.randomize_sign();
91+
src2_im.randomize_sign();
92+
FloatBuffer dst1_re(count, align, mask & 0x10);
93+
FloatBuffer dst1_im(count, align, mask & 0x20);
94+
FloatBuffer dst2_re(count, align, mask & 0x10);
95+
FloatBuffer dst2_im(count, align, mask & 0x20);
96+
97+
// Call functions
98+
func1(dst1_re, dst1_im, src1_re, src1_im, src2_re, src2_im, count);
99+
func2(dst2_re, dst2_im, src1_re, src1_im, src2_re, src2_im, count);
100+
101+
UTEST_ASSERT_MSG(src1_re.valid(), "Source buffer 1 RE corrupted");
102+
UTEST_ASSERT_MSG(src1_im.valid(), "Source buffer 1 IM corrupted");
103+
UTEST_ASSERT_MSG(src2_re.valid(), "Source buffer 2 RE corrupted");
104+
UTEST_ASSERT_MSG(src2_im.valid(), "Source buffer 2 IM corrupted");
105+
UTEST_ASSERT_MSG(dst1_re.valid(), "Destination buffer 1 RE corrupted");
106+
UTEST_ASSERT_MSG(dst1_im.valid(), "Destination buffer 1 IM corrupted");
107+
UTEST_ASSERT_MSG(dst2_re.valid(), "Destination buffer 2 RE corrupted");
108+
UTEST_ASSERT_MSG(dst2_im.valid(), "Destination buffer 2 IM corrupted");
109+
110+
// Compare buffers
111+
if ((!dst2_re.equals_absolute(dst1_re, 1e-4)) || (!dst2_im.equals_absolute(dst1_im, 1e-4)))
112+
{
113+
src1_re.dump("src1_re");
114+
src1_im.dump("src1_im");
115+
src2_re.dump("src2_re");
116+
src2_im.dump("src2_im");
117+
dst1_re.dump("dst1_re");
118+
dst1_im.dump("dst1_im");
119+
dst2_re.dump("dst2_re");
120+
dst2_im.dump("dst2_im");
121+
UTEST_FAIL_MSG("Output of functions for test '%s' differs", text);
122+
}
123+
}
124+
}
125+
}
126+
127+
UTEST_MAIN
128+
{
129+
#define CALL(generic, func, align) \
130+
call(#func, align, generic, func)
131+
132+
IF_ARCH_X86(CALL(generic::complex_div3, sse::complex_div3, 16));
133+
IF_ARCH_X86(CALL(generic::complex_div3, avx::complex_div3, 32));
134+
IF_ARCH_X86(CALL(generic::complex_div3, avx::complex_div3_fma3, 32));
135+
IF_ARCH_X86(CALL(generic::complex_div3, avx512::complex_div3, 64));
136+
137+
IF_ARCH_ARM(CALL(generic::complex_div3, neon_d32::complex_div3, 16));
138+
139+
IF_ARCH_AARCH64(CALL(generic::complex_div3, asimd::complex_div3, 16));
140+
}
141+
142+
UTEST_END;
143+
144+
145+
146+

0 commit comments

Comments
 (0)