Skip to content

Commit 2e5735a

Browse files
author
sadko
committed
Fixed bun in AVX-512 implementation of mid/side conversion functions
1 parent 96963f4 commit 2e5735a

File tree

2 files changed

+98
-97
lines changed

2 files changed

+98
-97
lines changed

CHANGELOG

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
=== 1.0.19 ===
66
* AVX2 optimization of search functions for maximum and minimum.
77
* Implemented SIMD-optimized gate functions.
8+
* Fixed bun in AVX-512 implementation of mid/side conversion functions.
89
* AVX512 optimization of packed complex functions.
910

1011
=== 1.0.18 ===

include/private/dsp/arch/x86/avx512/msmatrix.h

Lines changed: 97 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -138,102 +138,102 @@ namespace lsp
138138
);
139139
}
140140

141-
#define LR_TO_PART(P, L, R, OP) \
142-
__ASM_EMIT("xor %[off], %[off]") \
143-
__ASM_EMIT("vmovaps %[X_HALF], %%zmm7") \
144-
/* 64x blocks */ \
145-
__ASM_EMIT("sub $64, %[count]") \
146-
__ASM_EMIT("vmovaps %%zmm7, %%zmm6") \
147-
__ASM_EMIT("jb 2f") \
148-
__ASM_EMIT("1:") \
149-
__ASM_EMIT("vmovups 0x00(%[" L "], %[off]), %%ymm0") /* ymm0 = l */ \
150-
__ASM_EMIT("vmovups 0x40(%[" L "], %[off]), %%ymm1") \
151-
__ASM_EMIT("vmovups 0x80(%[" L "], %[off]), %%ymm2") \
152-
__ASM_EMIT("vmovups 0xc0(%[" L "], %[off]), %%ymm3") \
153-
__ASM_EMIT(OP "ps 0x00(%[" R "], %[off]), %%ymm0, %%ymm0") /* ymm0 = l op r */ \
154-
__ASM_EMIT(OP "ps 0x40(%[" R "], %[off]), %%ymm1, %%ymm1") \
155-
__ASM_EMIT(OP "ps 0x80(%[" R "], %[off]), %%ymm2, %%ymm2") \
156-
__ASM_EMIT(OP "ps 0xc0(%[" R "], %[off]), %%ymm3, %%ymm3") \
157-
__ASM_EMIT("vmulps %%ymm6, %%ymm0, %%ymm0") /* ymm0 = (l op r) * 0.5 */ \
158-
__ASM_EMIT("vmulps %%ymm7, %%ymm1, %%ymm1") \
159-
__ASM_EMIT("vmulps %%ymm6, %%ymm2, %%ymm2") \
160-
__ASM_EMIT("vmulps %%ymm7, %%ymm3, %%ymm3") \
161-
__ASM_EMIT("vmovups %%ymm0, 0x00(%[" P "], %[off])") \
162-
__ASM_EMIT("vmovups %%ymm1, 0x40(%[" P "], %[off])") \
163-
__ASM_EMIT("vmovups %%ymm2, 0x80(%[" P "], %[off])") \
164-
__ASM_EMIT("vmovups %%ymm3, 0xc0(%[" P "], %[off])") \
165-
__ASM_EMIT("add $0x100, %[off]") \
166-
__ASM_EMIT("sub $64, %[count]") \
167-
__ASM_EMIT("jae 1b") \
168-
__ASM_EMIT("2:") \
169-
/* 32x block */ \
170-
__ASM_EMIT("add $32, %[count]") \
171-
__ASM_EMIT("jl 4f") \
172-
__ASM_EMIT("vmovups 0x00(%[" L "], %[off]), %%ymm0") /* ymm0 = l */ \
173-
__ASM_EMIT("vmovups 0x20(%[" L "], %[off]), %%ymm1") \
174-
__ASM_EMIT("vmovups 0x40(%[" L "], %[off]), %%ymm2") \
175-
__ASM_EMIT("vmovups 0x60(%[" L "], %[off]), %%ymm3") \
176-
__ASM_EMIT(OP "ps 0x00(%[" R "], %[off]), %%ymm0, %%ymm0") /* ymm0 = l op r */ \
177-
__ASM_EMIT(OP "ps 0x20(%[" R "], %[off]), %%ymm1, %%ymm1") \
178-
__ASM_EMIT(OP "ps 0x40(%[" R "], %[off]), %%ymm2, %%ymm2") \
179-
__ASM_EMIT(OP "ps 0x60(%[" R "], %[off]), %%ymm3, %%ymm3") \
180-
__ASM_EMIT("vmulps %%ymm6, %%ymm0, %%ymm0") /* ymm0 = (l op r) * 0.5 */ \
181-
__ASM_EMIT("vmulps %%ymm7, %%ymm1, %%ymm1") \
182-
__ASM_EMIT("vmulps %%ymm6, %%ymm2, %%ymm2") \
183-
__ASM_EMIT("vmulps %%ymm7, %%ymm3, %%ymm3") \
184-
__ASM_EMIT("vmovups %%ymm0, 0x00(%[" P "], %[off])") \
185-
__ASM_EMIT("vmovups %%ymm1, 0x20(%[" P "], %[off])") \
186-
__ASM_EMIT("vmovups %%ymm2, 0x40(%[" P "], %[off])") \
187-
__ASM_EMIT("vmovups %%ymm3, 0x60(%[" P "], %[off])") \
188-
__ASM_EMIT("sub $32, %[count]") \
189-
__ASM_EMIT("add $0x80, %[off]") \
190-
__ASM_EMIT("4:") \
191-
/* 16x block */ \
192-
__ASM_EMIT("add $16, %[count]") \
193-
__ASM_EMIT("jl 6f") \
194-
__ASM_EMIT("vmovups 0x00(%[" L "], %[off]), %%ymm0") /* ymm0 = l */ \
195-
__ASM_EMIT("vmovups 0x20(%[" L "], %[off]), %%ymm1") \
196-
__ASM_EMIT(OP "ps 0x00(%[" R "], %[off]), %%ymm0, %%ymm0") /* ymm0 = l op r */ \
197-
__ASM_EMIT(OP "ps 0x20(%[" R "], %[off]), %%ymm1, %%ymm1") \
198-
__ASM_EMIT("vmulps %%ymm6, %%ymm0, %%ymm0") /* ymm0 = (l op r) * 0.5 */ \
199-
__ASM_EMIT("vmulps %%ymm7, %%ymm1, %%ymm1") \
200-
__ASM_EMIT("vmovups %%ymm0, 0x00(%[" P "], %[off])") \
201-
__ASM_EMIT("vmovups %%ymm1, 0x20(%[" P "], %[off])") \
202-
__ASM_EMIT("sub $16, %[count]") \
203-
__ASM_EMIT("add $0x40, %[off]") \
204-
__ASM_EMIT("6:") \
205-
/* 8x block */ \
206-
__ASM_EMIT("add $8, %[count]") \
207-
__ASM_EMIT("jl 8f") \
208-
__ASM_EMIT("vmovups 0x00(%[" L "], %[off]), %%ymm0") /* ymm0 = l */ \
209-
__ASM_EMIT(OP "ps 0x00(%[" R "], %[off]), %%ymm0, %%ymm0") /* ymm0 = l op r */ \
210-
__ASM_EMIT("vmulps %%ymm6, %%ymm0, %%ymm0") /* ymm0 = (l op r) * 0.5 */ \
211-
__ASM_EMIT("vmovups %%ymm0, 0x00(%[" P "], %[off])") \
212-
__ASM_EMIT("sub $8, %[count]") \
213-
__ASM_EMIT("add $0x20, %[off]") \
214-
__ASM_EMIT("8:") \
215-
/* 4x block */ \
216-
__ASM_EMIT("add $4, %[count]") \
217-
__ASM_EMIT("jl 10f") \
218-
__ASM_EMIT("vmovups 0x00(%[" L "], %[off]), %%xmm0") /* xmm0 = l */ \
219-
__ASM_EMIT(OP "ps 0x00(%[" R "], %[off]), %%xmm0, %%xmm0") /* xmm0 = l op r */ \
220-
__ASM_EMIT("vmulps %%xmm6, %%xmm0, %%xmm0") /* xmm0 = (l op r) * 0.5 */ \
221-
__ASM_EMIT("vmovups %%xmm0, 0x00(%[" P "], %[off])") \
222-
__ASM_EMIT("sub $4, %[count]") \
223-
__ASM_EMIT("add $0x10, %[off]") \
224-
__ASM_EMIT("10:") \
225-
/* 1x blocks */ \
226-
__ASM_EMIT("add $3, %[count]") \
227-
__ASM_EMIT("jl 12f") \
228-
__ASM_EMIT("11:") \
229-
__ASM_EMIT("vmovss 0x00(%[" L "], %[off]), %%xmm0") /* xmm0 = l */ \
230-
__ASM_EMIT(OP "ss 0x00(%[" R "], %[off]), %%xmm0, %%xmm0") /* xmm0 = l op r */ \
231-
__ASM_EMIT("vmulss %%xmm6, %%xmm0, %%xmm0") /* xmm0 = (l op r) * 0.5 */ \
232-
__ASM_EMIT("vmovss %%xmm0, 0x00(%[" P "], %[off])") \
233-
__ASM_EMIT("add $0x04, %[off]") \
234-
__ASM_EMIT("dec %[count]") \
235-
__ASM_EMIT("jge 11b") \
236-
__ASM_EMIT("12:")
141+
#define LR_TO_PART(P, L, R, OP) \
142+
__ASM_EMIT("vmovaps %[X_HALF], %%zmm7") \
143+
__ASM_EMIT("xor %[off], %[off]") \
144+
__ASM_EMIT("vmovaps %%zmm7, %%zmm6") \
145+
/* 64x blocks */ \
146+
__ASM_EMIT("sub $64, %[count]") \
147+
__ASM_EMIT("jb 2f") \
148+
__ASM_EMIT("1:") \
149+
__ASM_EMIT("vmovups 0x00(%[" L "], %[off]), %%zmm0") /* zmm0 = l */ \
150+
__ASM_EMIT("vmovups 0x40(%[" L "], %[off]), %%zmm1") \
151+
__ASM_EMIT("vmovups 0x80(%[" L "], %[off]), %%zmm2") \
152+
__ASM_EMIT("vmovups 0xc0(%[" L "], %[off]), %%zmm3") \
153+
__ASM_EMIT(OP "ps 0x00(%[" R "], %[off]), %%zmm0, %%zmm0") /* zmm0 = l op r */ \
154+
__ASM_EMIT(OP "ps 0x40(%[" R "], %[off]), %%zmm1, %%zmm1") \
155+
__ASM_EMIT(OP "ps 0x80(%[" R "], %[off]), %%zmm2, %%zmm2") \
156+
__ASM_EMIT(OP "ps 0xc0(%[" R "], %[off]), %%zmm3, %%zmm3") \
157+
__ASM_EMIT("vmulps %%zmm6, %%zmm0, %%zmm0") /* zmm0 = (l op r) * 0.5 */ \
158+
__ASM_EMIT("vmulps %%zmm7, %%zmm1, %%zmm1") \
159+
__ASM_EMIT("vmulps %%zmm6, %%zmm2, %%zmm2") \
160+
__ASM_EMIT("vmulps %%zmm7, %%zmm3, %%zmm3") \
161+
__ASM_EMIT("vmovups %%zmm0, 0x00(%[" P "], %[off])") \
162+
__ASM_EMIT("vmovups %%zmm1, 0x40(%[" P "], %[off])") \
163+
__ASM_EMIT("vmovups %%zmm2, 0x80(%[" P "], %[off])") \
164+
__ASM_EMIT("vmovups %%zmm3, 0xc0(%[" P "], %[off])") \
165+
__ASM_EMIT("add $0x100, %[off]") \
166+
__ASM_EMIT("sub $64, %[count]") \
167+
__ASM_EMIT("jae 1b") \
168+
__ASM_EMIT("2:") \
169+
/* 32x block */ \
170+
__ASM_EMIT("add $32, %[count]") \
171+
__ASM_EMIT("jl 4f") \
172+
__ASM_EMIT("vmovups 0x00(%[" L "], %[off]), %%ymm0") /* ymm0 = l */ \
173+
__ASM_EMIT("vmovups 0x20(%[" L "], %[off]), %%ymm1") \
174+
__ASM_EMIT("vmovups 0x40(%[" L "], %[off]), %%ymm2") \
175+
__ASM_EMIT("vmovups 0x60(%[" L "], %[off]), %%ymm3") \
176+
__ASM_EMIT(OP "ps 0x00(%[" R "], %[off]), %%ymm0, %%ymm0") /* ymm0 = l op r */ \
177+
__ASM_EMIT(OP "ps 0x20(%[" R "], %[off]), %%ymm1, %%ymm1") \
178+
__ASM_EMIT(OP "ps 0x40(%[" R "], %[off]), %%ymm2, %%ymm2") \
179+
__ASM_EMIT(OP "ps 0x60(%[" R "], %[off]), %%ymm3, %%ymm3") \
180+
__ASM_EMIT("vmulps %%ymm6, %%ymm0, %%ymm0") /* ymm0 = (l op r) * 0.5 */ \
181+
__ASM_EMIT("vmulps %%ymm7, %%ymm1, %%ymm1") \
182+
__ASM_EMIT("vmulps %%ymm6, %%ymm2, %%ymm2") \
183+
__ASM_EMIT("vmulps %%ymm7, %%ymm3, %%ymm3") \
184+
__ASM_EMIT("vmovups %%ymm0, 0x00(%[" P "], %[off])") \
185+
__ASM_EMIT("vmovups %%ymm1, 0x20(%[" P "], %[off])") \
186+
__ASM_EMIT("vmovups %%ymm2, 0x40(%[" P "], %[off])") \
187+
__ASM_EMIT("vmovups %%ymm3, 0x60(%[" P "], %[off])") \
188+
__ASM_EMIT("sub $32, %[count]") \
189+
__ASM_EMIT("add $0x80, %[off]") \
190+
__ASM_EMIT("4:") \
191+
/* 16x block */ \
192+
__ASM_EMIT("add $16, %[count]") \
193+
__ASM_EMIT("jl 6f") \
194+
__ASM_EMIT("vmovups 0x00(%[" L "], %[off]), %%ymm0") /* ymm0 = l */ \
195+
__ASM_EMIT("vmovups 0x20(%[" L "], %[off]), %%ymm1") \
196+
__ASM_EMIT(OP "ps 0x00(%[" R "], %[off]), %%ymm0, %%ymm0") /* ymm0 = l op r */ \
197+
__ASM_EMIT(OP "ps 0x20(%[" R "], %[off]), %%ymm1, %%ymm1") \
198+
__ASM_EMIT("vmulps %%ymm6, %%ymm0, %%ymm0") /* ymm0 = (l op r) * 0.5 */ \
199+
__ASM_EMIT("vmulps %%ymm7, %%ymm1, %%ymm1") \
200+
__ASM_EMIT("vmovups %%ymm0, 0x00(%[" P "], %[off])") \
201+
__ASM_EMIT("vmovups %%ymm1, 0x20(%[" P "], %[off])") \
202+
__ASM_EMIT("sub $16, %[count]") \
203+
__ASM_EMIT("add $0x40, %[off]") \
204+
__ASM_EMIT("6:") \
205+
/* 8x block */ \
206+
__ASM_EMIT("add $8, %[count]") \
207+
__ASM_EMIT("jl 8f") \
208+
__ASM_EMIT("vmovups 0x00(%[" L "], %[off]), %%ymm0") /* ymm0 = l */ \
209+
__ASM_EMIT(OP "ps 0x00(%[" R "], %[off]), %%ymm0, %%ymm0") /* ymm0 = l op r */ \
210+
__ASM_EMIT("vmulps %%ymm6, %%ymm0, %%ymm0") /* ymm0 = (l op r) * 0.5 */ \
211+
__ASM_EMIT("vmovups %%ymm0, 0x00(%[" P "], %[off])") \
212+
__ASM_EMIT("sub $8, %[count]") \
213+
__ASM_EMIT("add $0x20, %[off]") \
214+
__ASM_EMIT("8:") \
215+
/* 4x block */ \
216+
__ASM_EMIT("add $4, %[count]") \
217+
__ASM_EMIT("jl 10f") \
218+
__ASM_EMIT("vmovups 0x00(%[" L "], %[off]), %%xmm0") /* xmm0 = l */ \
219+
__ASM_EMIT(OP "ps 0x00(%[" R "], %[off]), %%xmm0, %%xmm0") /* xmm0 = l op r */ \
220+
__ASM_EMIT("vmulps %%xmm6, %%xmm0, %%xmm0") /* xmm0 = (l op r) * 0.5 */ \
221+
__ASM_EMIT("vmovups %%xmm0, 0x00(%[" P "], %[off])") \
222+
__ASM_EMIT("sub $4, %[count]") \
223+
__ASM_EMIT("add $0x10, %[off]") \
224+
__ASM_EMIT("10:") \
225+
/* 1x blocks */ \
226+
__ASM_EMIT("add $3, %[count]") \
227+
__ASM_EMIT("jl 12f") \
228+
__ASM_EMIT("11:") \
229+
__ASM_EMIT("vmovss 0x00(%[" L "], %[off]), %%xmm0") /* xmm0 = l */ \
230+
__ASM_EMIT(OP "ss 0x00(%[" R "], %[off]), %%xmm0, %%xmm0") /* xmm0 = l op r */ \
231+
__ASM_EMIT("vmulss %%xmm6, %%xmm0, %%xmm0") /* xmm0 = (l op r) * 0.5 */ \
232+
__ASM_EMIT("vmovss %%xmm0, 0x00(%[" P "], %[off])") \
233+
__ASM_EMIT("add $0x04, %[off]") \
234+
__ASM_EMIT("dec %[count]") \
235+
__ASM_EMIT("jge 11b") \
236+
__ASM_EMIT("12:")
237237

238238
void lr_to_mid(float *m, const float *l, const float *r, size_t count)
239239
{
@@ -265,7 +265,7 @@ namespace lsp
265265
);
266266
}
267267

268-
#undef LR_TO_PART
268+
#undef LR_TO_PART
269269

270270
void ms_to_lr(float *l, float *r, const float *m, const float *s, size_t count)
271271
{

0 commit comments

Comments
 (0)