@@ -138,102 +138,102 @@ namespace lsp
138
138
);
139
139
}
140
140
141
- #define LR_TO_PART (P, L, R, OP ) \
142
- __ASM_EMIT (" xor %[off ], %[off] " ) \
143
- __ASM_EMIT ( " vmovaps %[X_HALF ], %%zmm7 " ) \
144
- /* 64x blocks */ \
145
- __ASM_EMIT ( " sub $64, %[count] " ) \
146
- __ASM_EMIT ( " vmovaps %%zmm7 , %%zmm6 " ) \
147
- __ASM_EMIT (" jb 2f" ) \
148
- __ASM_EMIT (" 1:" ) \
149
- __ASM_EMIT (" vmovups 0x00(%[" L " ], %[off]), %%ymm0 " ) /* ymm0 = l */ \
150
- __ASM_EMIT (" vmovups 0x40(%[" L " ], %[off]), %%ymm1 " ) \
151
- __ASM_EMIT (" vmovups 0x80(%[" L " ], %[off]), %%ymm2 " ) \
152
- __ASM_EMIT (" vmovups 0xc0(%[" L " ], %[off]), %%ymm3 " ) \
153
- __ASM_EMIT (OP " ps 0x00(%[" R " ], %[off]), %%ymm0 , %%ymm0 " ) /* ymm0 = l op r */ \
154
- __ASM_EMIT (OP " ps 0x40(%[" R " ], %[off]), %%ymm1 , %%ymm1 " ) \
155
- __ASM_EMIT (OP " ps 0x80(%[" R " ], %[off]), %%ymm2 , %%ymm2 " ) \
156
- __ASM_EMIT (OP " ps 0xc0(%[" R " ], %[off]), %%ymm3 , %%ymm3 " ) \
157
- __ASM_EMIT (" vmulps %%ymm6 , %%ymm0 , %%ymm0 " ) /* ymm0 = (l op r) * 0.5 */ \
158
- __ASM_EMIT (" vmulps %%ymm7 , %%ymm1 , %%ymm1 " ) \
159
- __ASM_EMIT (" vmulps %%ymm6 , %%ymm2 , %%ymm2 " ) \
160
- __ASM_EMIT (" vmulps %%ymm7 , %%ymm3 , %%ymm3 " ) \
161
- __ASM_EMIT (" vmovups %%ymm0 , 0x00(%[" P " ], %[off])" ) \
162
- __ASM_EMIT (" vmovups %%ymm1 , 0x40(%[" P " ], %[off])" ) \
163
- __ASM_EMIT (" vmovups %%ymm2 , 0x80(%[" P " ], %[off])" ) \
164
- __ASM_EMIT (" vmovups %%ymm3 , 0xc0(%[" P " ], %[off])" ) \
165
- __ASM_EMIT (" add $0x100, %[off]" ) \
166
- __ASM_EMIT (" sub $64, %[count]" ) \
167
- __ASM_EMIT (" jae 1b" ) \
168
- __ASM_EMIT (" 2:" ) \
169
- /* 32x block */ \
170
- __ASM_EMIT (" add $32, %[count]" ) \
171
- __ASM_EMIT (" jl 4f" ) \
172
- __ASM_EMIT (" vmovups 0x00(%[" L " ], %[off]), %%ymm0" ) /* ymm0 = l */ \
173
- __ASM_EMIT (" vmovups 0x20(%[" L " ], %[off]), %%ymm1" ) \
174
- __ASM_EMIT (" vmovups 0x40(%[" L " ], %[off]), %%ymm2" ) \
175
- __ASM_EMIT (" vmovups 0x60(%[" L " ], %[off]), %%ymm3" ) \
176
- __ASM_EMIT (OP " ps 0x00(%[" R " ], %[off]), %%ymm0, %%ymm0" ) /* ymm0 = l op r */ \
177
- __ASM_EMIT (OP " ps 0x20(%[" R " ], %[off]), %%ymm1, %%ymm1" ) \
178
- __ASM_EMIT (OP " ps 0x40(%[" R " ], %[off]), %%ymm2, %%ymm2" ) \
179
- __ASM_EMIT (OP " ps 0x60(%[" R " ], %[off]), %%ymm3, %%ymm3" ) \
180
- __ASM_EMIT (" vmulps %%ymm6, %%ymm0, %%ymm0" ) /* ymm0 = (l op r) * 0.5 */ \
181
- __ASM_EMIT (" vmulps %%ymm7, %%ymm1, %%ymm1" ) \
182
- __ASM_EMIT (" vmulps %%ymm6, %%ymm2, %%ymm2" ) \
183
- __ASM_EMIT (" vmulps %%ymm7, %%ymm3, %%ymm3" ) \
184
- __ASM_EMIT (" vmovups %%ymm0, 0x00(%[" P " ], %[off])" ) \
185
- __ASM_EMIT (" vmovups %%ymm1, 0x20(%[" P " ], %[off])" ) \
186
- __ASM_EMIT (" vmovups %%ymm2, 0x40(%[" P " ], %[off])" ) \
187
- __ASM_EMIT (" vmovups %%ymm3, 0x60(%[" P " ], %[off])" ) \
188
- __ASM_EMIT (" sub $32, %[count]" ) \
189
- __ASM_EMIT (" add $0x80, %[off]" ) \
190
- __ASM_EMIT (" 4:" ) \
191
- /* 16x block */ \
192
- __ASM_EMIT (" add $16, %[count]" ) \
193
- __ASM_EMIT (" jl 6f" ) \
194
- __ASM_EMIT (" vmovups 0x00(%[" L " ], %[off]), %%ymm0" ) /* ymm0 = l */ \
195
- __ASM_EMIT (" vmovups 0x20(%[" L " ], %[off]), %%ymm1" ) \
196
- __ASM_EMIT (OP " ps 0x00(%[" R " ], %[off]), %%ymm0, %%ymm0" ) /* ymm0 = l op r */ \
197
- __ASM_EMIT (OP " ps 0x20(%[" R " ], %[off]), %%ymm1, %%ymm1" ) \
198
- __ASM_EMIT (" vmulps %%ymm6, %%ymm0, %%ymm0" ) /* ymm0 = (l op r) * 0.5 */ \
199
- __ASM_EMIT (" vmulps %%ymm7, %%ymm1, %%ymm1" ) \
200
- __ASM_EMIT (" vmovups %%ymm0, 0x00(%[" P " ], %[off])" ) \
201
- __ASM_EMIT (" vmovups %%ymm1, 0x20(%[" P " ], %[off])" ) \
202
- __ASM_EMIT (" sub $16, %[count]" ) \
203
- __ASM_EMIT (" add $0x40, %[off]" ) \
204
- __ASM_EMIT (" 6:" ) \
205
- /* 8x block */ \
206
- __ASM_EMIT (" add $8, %[count]" ) \
207
- __ASM_EMIT (" jl 8f" ) \
208
- __ASM_EMIT (" vmovups 0x00(%[" L " ], %[off]), %%ymm0" ) /* ymm0 = l */ \
209
- __ASM_EMIT (OP " ps 0x00(%[" R " ], %[off]), %%ymm0, %%ymm0" ) /* ymm0 = l op r */ \
210
- __ASM_EMIT (" vmulps %%ymm6, %%ymm0, %%ymm0" ) /* ymm0 = (l op r) * 0.5 */ \
211
- __ASM_EMIT (" vmovups %%ymm0, 0x00(%[" P " ], %[off])" ) \
212
- __ASM_EMIT (" sub $8, %[count]" ) \
213
- __ASM_EMIT (" add $0x20, %[off]" ) \
214
- __ASM_EMIT (" 8:" ) \
215
- /* 4x block */ \
216
- __ASM_EMIT (" add $4, %[count]" ) \
217
- __ASM_EMIT (" jl 10f" ) \
218
- __ASM_EMIT (" vmovups 0x00(%[" L " ], %[off]), %%xmm0" ) /* xmm0 = l */ \
219
- __ASM_EMIT (OP " ps 0x00(%[" R " ], %[off]), %%xmm0, %%xmm0" ) /* xmm0 = l op r */ \
220
- __ASM_EMIT (" vmulps %%xmm6, %%xmm0, %%xmm0" ) /* xmm0 = (l op r) * 0.5 */ \
221
- __ASM_EMIT (" vmovups %%xmm0, 0x00(%[" P " ], %[off])" ) \
222
- __ASM_EMIT (" sub $4, %[count]" ) \
223
- __ASM_EMIT (" add $0x10, %[off]" ) \
224
- __ASM_EMIT (" 10:" ) \
225
- /* 1x blocks */ \
226
- __ASM_EMIT (" add $3, %[count]" ) \
227
- __ASM_EMIT (" jl 12f" ) \
228
- __ASM_EMIT (" 11:" ) \
229
- __ASM_EMIT (" vmovss 0x00(%[" L " ], %[off]), %%xmm0" ) /* xmm0 = l */ \
230
- __ASM_EMIT (OP " ss 0x00(%[" R " ], %[off]), %%xmm0, %%xmm0" ) /* xmm0 = l op r */ \
231
- __ASM_EMIT (" vmulss %%xmm6, %%xmm0, %%xmm0" ) /* xmm0 = (l op r) * 0.5 */ \
232
- __ASM_EMIT (" vmovss %%xmm0, 0x00(%[" P " ], %[off])" ) \
233
- __ASM_EMIT (" add $0x04, %[off]" ) \
234
- __ASM_EMIT (" dec %[count]" ) \
235
- __ASM_EMIT (" jge 11b" ) \
236
- __ASM_EMIT (" 12:" )
141
+ #define LR_TO_PART (P, L, R, OP ) \
142
+ __ASM_EMIT (" vmovaps %[X_HALF ], %%zmm7 " ) \
143
+ __ASM_EMIT ( " xor %[off ], %[off] " ) \
144
+ __ASM_EMIT ( " vmovaps %%zmm7, %%zmm6 " ) \
145
+ /* 64x blocks */ \
146
+ __ASM_EMIT ( " sub $64 , %[count] " ) \
147
+ __ASM_EMIT (" jb 2f" ) \
148
+ __ASM_EMIT (" 1:" ) \
149
+ __ASM_EMIT (" vmovups 0x00(%[" L " ], %[off]), %%zmm0 " ) /* zmm0 = l */ \
150
+ __ASM_EMIT (" vmovups 0x40(%[" L " ], %[off]), %%zmm1 " ) \
151
+ __ASM_EMIT (" vmovups 0x80(%[" L " ], %[off]), %%zmm2 " ) \
152
+ __ASM_EMIT (" vmovups 0xc0(%[" L " ], %[off]), %%zmm3 " ) \
153
+ __ASM_EMIT (OP " ps 0x00(%[" R " ], %[off]), %%zmm0 , %%zmm0 " ) /* zmm0 = l op r */ \
154
+ __ASM_EMIT (OP " ps 0x40(%[" R " ], %[off]), %%zmm1 , %%zmm1 " ) \
155
+ __ASM_EMIT (OP " ps 0x80(%[" R " ], %[off]), %%zmm2 , %%zmm2 " ) \
156
+ __ASM_EMIT (OP " ps 0xc0(%[" R " ], %[off]), %%zmm3 , %%zmm3 " ) \
157
+ __ASM_EMIT (" vmulps %%zmm6 , %%zmm0 , %%zmm0 " ) /* zmm0 = (l op r) * 0.5 */ \
158
+ __ASM_EMIT (" vmulps %%zmm7 , %%zmm1 , %%zmm1 " ) \
159
+ __ASM_EMIT (" vmulps %%zmm6 , %%zmm2 , %%zmm2 " ) \
160
+ __ASM_EMIT (" vmulps %%zmm7 , %%zmm3 , %%zmm3 " ) \
161
+ __ASM_EMIT (" vmovups %%zmm0 , 0x00(%[" P " ], %[off])" ) \
162
+ __ASM_EMIT (" vmovups %%zmm1 , 0x40(%[" P " ], %[off])" ) \
163
+ __ASM_EMIT (" vmovups %%zmm2 , 0x80(%[" P " ], %[off])" ) \
164
+ __ASM_EMIT (" vmovups %%zmm3 , 0xc0(%[" P " ], %[off])" ) \
165
+ __ASM_EMIT (" add $0x100, %[off]" ) \
166
+ __ASM_EMIT (" sub $64, %[count]" ) \
167
+ __ASM_EMIT (" jae 1b" ) \
168
+ __ASM_EMIT (" 2:" ) \
169
+ /* 32x block */ \
170
+ __ASM_EMIT (" add $32, %[count]" ) \
171
+ __ASM_EMIT (" jl 4f" ) \
172
+ __ASM_EMIT (" vmovups 0x00(%[" L " ], %[off]), %%ymm0" ) /* ymm0 = l */ \
173
+ __ASM_EMIT (" vmovups 0x20(%[" L " ], %[off]), %%ymm1" ) \
174
+ __ASM_EMIT (" vmovups 0x40(%[" L " ], %[off]), %%ymm2" ) \
175
+ __ASM_EMIT (" vmovups 0x60(%[" L " ], %[off]), %%ymm3" ) \
176
+ __ASM_EMIT (OP " ps 0x00(%[" R " ], %[off]), %%ymm0, %%ymm0" ) /* ymm0 = l op r */ \
177
+ __ASM_EMIT (OP " ps 0x20(%[" R " ], %[off]), %%ymm1, %%ymm1" ) \
178
+ __ASM_EMIT (OP " ps 0x40(%[" R " ], %[off]), %%ymm2, %%ymm2" ) \
179
+ __ASM_EMIT (OP " ps 0x60(%[" R " ], %[off]), %%ymm3, %%ymm3" ) \
180
+ __ASM_EMIT (" vmulps %%ymm6, %%ymm0, %%ymm0" ) /* ymm0 = (l op r) * 0.5 */ \
181
+ __ASM_EMIT (" vmulps %%ymm7, %%ymm1, %%ymm1" ) \
182
+ __ASM_EMIT (" vmulps %%ymm6, %%ymm2, %%ymm2" ) \
183
+ __ASM_EMIT (" vmulps %%ymm7, %%ymm3, %%ymm3" ) \
184
+ __ASM_EMIT (" vmovups %%ymm0, 0x00(%[" P " ], %[off])" ) \
185
+ __ASM_EMIT (" vmovups %%ymm1, 0x20(%[" P " ], %[off])" ) \
186
+ __ASM_EMIT (" vmovups %%ymm2, 0x40(%[" P " ], %[off])" ) \
187
+ __ASM_EMIT (" vmovups %%ymm3, 0x60(%[" P " ], %[off])" ) \
188
+ __ASM_EMIT (" sub $32, %[count]" ) \
189
+ __ASM_EMIT (" add $0x80, %[off]" ) \
190
+ __ASM_EMIT (" 4:" ) \
191
+ /* 16x block */ \
192
+ __ASM_EMIT (" add $16, %[count]" ) \
193
+ __ASM_EMIT (" jl 6f" ) \
194
+ __ASM_EMIT (" vmovups 0x00(%[" L " ], %[off]), %%ymm0" ) /* ymm0 = l */ \
195
+ __ASM_EMIT (" vmovups 0x20(%[" L " ], %[off]), %%ymm1" ) \
196
+ __ASM_EMIT (OP " ps 0x00(%[" R " ], %[off]), %%ymm0, %%ymm0" ) /* ymm0 = l op r */ \
197
+ __ASM_EMIT (OP " ps 0x20(%[" R " ], %[off]), %%ymm1, %%ymm1" ) \
198
+ __ASM_EMIT (" vmulps %%ymm6, %%ymm0, %%ymm0" ) /* ymm0 = (l op r) * 0.5 */ \
199
+ __ASM_EMIT (" vmulps %%ymm7, %%ymm1, %%ymm1" ) \
200
+ __ASM_EMIT (" vmovups %%ymm0, 0x00(%[" P " ], %[off])" ) \
201
+ __ASM_EMIT (" vmovups %%ymm1, 0x20(%[" P " ], %[off])" ) \
202
+ __ASM_EMIT (" sub $16, %[count]" ) \
203
+ __ASM_EMIT (" add $0x40, %[off]" ) \
204
+ __ASM_EMIT (" 6:" ) \
205
+ /* 8x block */ \
206
+ __ASM_EMIT (" add $8, %[count]" ) \
207
+ __ASM_EMIT (" jl 8f" ) \
208
+ __ASM_EMIT (" vmovups 0x00(%[" L " ], %[off]), %%ymm0" ) /* ymm0 = l */ \
209
+ __ASM_EMIT (OP " ps 0x00(%[" R " ], %[off]), %%ymm0, %%ymm0" ) /* ymm0 = l op r */ \
210
+ __ASM_EMIT (" vmulps %%ymm6, %%ymm0, %%ymm0" ) /* ymm0 = (l op r) * 0.5 */ \
211
+ __ASM_EMIT (" vmovups %%ymm0, 0x00(%[" P " ], %[off])" ) \
212
+ __ASM_EMIT (" sub $8, %[count]" ) \
213
+ __ASM_EMIT (" add $0x20, %[off]" ) \
214
+ __ASM_EMIT (" 8:" ) \
215
+ /* 4x block */ \
216
+ __ASM_EMIT (" add $4, %[count]" ) \
217
+ __ASM_EMIT (" jl 10f" ) \
218
+ __ASM_EMIT (" vmovups 0x00(%[" L " ], %[off]), %%xmm0" ) /* xmm0 = l */ \
219
+ __ASM_EMIT (OP " ps 0x00(%[" R " ], %[off]), %%xmm0, %%xmm0" ) /* xmm0 = l op r */ \
220
+ __ASM_EMIT (" vmulps %%xmm6, %%xmm0, %%xmm0" ) /* xmm0 = (l op r) * 0.5 */ \
221
+ __ASM_EMIT (" vmovups %%xmm0, 0x00(%[" P " ], %[off])" ) \
222
+ __ASM_EMIT (" sub $4, %[count]" ) \
223
+ __ASM_EMIT (" add $0x10, %[off]" ) \
224
+ __ASM_EMIT (" 10:" ) \
225
+ /* 1x blocks */ \
226
+ __ASM_EMIT (" add $3, %[count]" ) \
227
+ __ASM_EMIT (" jl 12f" ) \
228
+ __ASM_EMIT (" 11:" ) \
229
+ __ASM_EMIT (" vmovss 0x00(%[" L " ], %[off]), %%xmm0" ) /* xmm0 = l */ \
230
+ __ASM_EMIT (OP " ss 0x00(%[" R " ], %[off]), %%xmm0, %%xmm0" ) /* xmm0 = l op r */ \
231
+ __ASM_EMIT (" vmulss %%xmm6, %%xmm0, %%xmm0" ) /* xmm0 = (l op r) * 0.5 */ \
232
+ __ASM_EMIT (" vmovss %%xmm0, 0x00(%[" P " ], %[off])" ) \
233
+ __ASM_EMIT (" add $0x04, %[off]" ) \
234
+ __ASM_EMIT (" dec %[count]" ) \
235
+ __ASM_EMIT (" jge 11b" ) \
236
+ __ASM_EMIT (" 12:" )
237
237
238
238
void lr_to_mid (float *m, const float *l, const float *r, size_t count)
239
239
{
@@ -265,7 +265,7 @@ namespace lsp
265
265
);
266
266
}
267
267
268
- #undef LR_TO_PART
268
+ #undef LR_TO_PART
269
269
270
270
void ms_to_lr (float *l, float *r, const float *m, const float *s, size_t count)
271
271
{
0 commit comments