@@ -101,213 +101,60 @@ void audiomixer_mixer_reset_buffer(audiomixer_mixer_obj_t* self,
101
101
}
102
102
}
103
103
104
- uint32_t add8signed (uint32_t a , uint32_t b ) {
105
- #if (defined (__ARM_ARCH_7EM__ ) && (__ARM_ARCH_7EM__ == 1 )) //Cortex-M4 w/FPU
106
- return __SHADD8 (a , b );
107
- #else
108
- uint32_t result = 0 ;
109
- for (int8_t i = 0 ; i < 4 ; i ++ ) {
110
- int8_t ai = a >> (sizeof (int8_t ) * 8 * i );
111
- int8_t bi = b >> (sizeof (int8_t ) * 8 * i );
112
- int32_t intermediate = (int32_t ) ai + bi / 2 ;
113
- if (intermediate > CHAR_MAX ) {
114
- intermediate = CHAR_MAX ;
115
- } else if (intermediate < CHAR_MIN ) {
116
- intermediate = CHAR_MIN ;
117
- }
118
- result |= ((uint32_t ) intermediate & 0xff ) << (sizeof (int8_t ) * 8 * i );
119
- }
120
- return result ;
121
- #endif
104
+ __attribute__((always_inline ))
105
+ static inline uint32_t add16signed (uint32_t a , uint32_t b ) {
106
+ return __QADD16 (a , b );
122
107
}
123
108
124
- uint32_t add8unsigned (uint32_t a , uint32_t b ) {
125
- #if (defined (__ARM_ARCH_7EM__ ) && (__ARM_ARCH_7EM__ == 1 )) //Cortex-M4 w/FPU
126
- return __UHADD8 (a , b );
127
- #else
128
- uint32_t result = 0 ;
129
- for (int8_t i = 0 ; i < 4 ; i ++ ) {
130
- uint8_t ai = (a >> (sizeof (uint8_t ) * 8 * i ));
131
- uint8_t bi = (b >> (sizeof (uint8_t ) * 8 * i ));
132
- int32_t intermediate = (int32_t ) (ai + bi ) / 2 ;
133
- if (intermediate > UCHAR_MAX ) {
134
- intermediate = UCHAR_MAX ;
135
- }
136
- result |= ((uint32_t ) intermediate & 0xff ) << (sizeof (uint8_t ) * 8 * i );
137
- }
138
- return result ;
139
- #endif
109
+ __attribute__((always_inline ))
110
+ static inline uint32_t mult16signed (uint32_t val , int32_t mul ) {
111
+ mul <<= 16 ;
112
+ int32_t hi , lo ;
113
+ enum { bits = 16 }; // saturate to 16 bits
114
+ enum { shift = 15 }; // shift is done automatically
115
+ asm volatile ("smulwb %0, %1, %2" : "=r" (lo ) : "r" (mul ), "r" (val ));
116
+ asm volatile ("smulwt %0, %1, %2" : "=r" (hi ) : "r" (mul ), "r" (val ));
117
+ asm volatile ("ssat %0, %1, %2, asr %3" : "=r" (lo ) : "I" (bits ), "r" (lo ), "I" (shift ));
118
+ asm volatile ("ssat %0, %1, %2, asr %3" : "=r" (hi ) : "I" (bits ), "r" (hi ), "I" (shift ));
119
+ asm volatile ("pkhbt %0, %1, %2, lsl #16" : "=r" (val ) : "r" (lo ), "r" (hi )); // pack
120
+ return val ;
140
121
}
141
122
142
- uint32_t add16signed (uint32_t a , uint32_t b ) {
143
- #if (defined (__ARM_ARCH_7EM__ ) && (__ARM_ARCH_7EM__ == 1 )) //Cortex-M4 w/FPU
144
- return __SHADD16 (a , b );
145
- #else
146
- uint32_t result = 0 ;
147
- for (int8_t i = 0 ; i < 2 ; i ++ ) {
148
- int16_t ai = a >> (sizeof (int16_t ) * 8 * i );
149
- int16_t bi = b >> (sizeof (int16_t ) * 8 * i );
150
- int32_t intermediate = (int32_t ) ai + bi / 2 ;
151
- if (intermediate > SHRT_MAX ) {
152
- intermediate = SHRT_MAX ;
153
- } else if (intermediate < SHRT_MIN ) {
154
- intermediate = SHRT_MIN ;
155
- }
156
- result |= (((uint32_t ) intermediate ) & 0xffff ) << (sizeof (int16_t ) * 8 * i );
157
- }
158
- return result ;
159
- #endif
123
+ static inline uint32_t tounsigned8 (uint32_t val ) {
124
+ return __UADD8 (val , 0x80808080 );
160
125
}
161
126
162
- uint32_t add16unsigned (uint32_t a , uint32_t b ) {
163
- #if (defined (__ARM_ARCH_7EM__ ) && (__ARM_ARCH_7EM__ == 1 )) //Cortex-M4 w/FPU
164
- return __UHADD16 (a , b );
165
- #else
166
- uint32_t result = 0 ;
167
- for (int8_t i = 0 ; i < 2 ; i ++ ) {
168
- int16_t ai = (a >> (sizeof (uint16_t ) * 8 * i )) - 0x8000 ;
169
- int16_t bi = (b >> (sizeof (uint16_t ) * 8 * i )) - 0x8000 ;
170
- int32_t intermediate = (int32_t ) ai + bi / 2 ;
171
- if (intermediate > USHRT_MAX ) {
172
- intermediate = USHRT_MAX ;
173
- }
174
- result |= ((uint32_t ) intermediate & 0xffff ) << (sizeof (int16_t ) * 8 * i );
175
- }
176
- return result ;
177
- #endif
127
+ static inline uint32_t tounsigned16 (uint32_t val ) {
128
+ return __UADD16 (val , 0x80008000 );
178
129
}
179
130
180
- static inline uint32_t mult8unsigned (uint32_t val , int32_t mul ) {
181
- // if mul == 0, no need in wasting cycles
182
- if (mul == 0 ) {
183
- return 0 ;
184
- }
185
- /* TODO: workout ARMv7 instructions
186
- #if (defined (__ARM_ARCH_7EM__) && (__ARM_ARCH_7EM__ == 1)) //Cortex-M4 w/FPU
187
- return val;
188
- #else*/
189
- uint32_t result = 0 ;
190
- float mod_mul = (float ) mul / (float ) ((1 <<15 )- 1 );
191
- for (int8_t i = 0 ; i < 4 ; i ++ ) {
192
- uint8_t ai = val >> (sizeof (uint8_t ) * 8 * i );
193
- int32_t intermediate = ai * mod_mul ;
194
- if (intermediate > SHRT_MAX ) {
195
- intermediate = SHRT_MAX ;
196
- }
197
- result |= ((uint32_t ) intermediate & 0xff ) << (sizeof (uint8_t ) * 8 * i );
198
- }
199
-
200
- return result ;
201
- //#endif
202
- }
203
-
204
- static inline uint32_t mult8signed (uint32_t val , int32_t mul ) {
205
- // if mul == 0, no need in wasting cycles
206
- if (mul == 0 ) {
207
- return 0 ;
208
- }
209
- /* TODO: workout ARMv7 instructions
210
- #if (defined (__ARM_ARCH_7EM__) && (__ARM_ARCH_7EM__ == 1)) //Cortex-M4 w/FPU
211
- return val;
212
- #else
213
- */
214
- uint32_t result = 0 ;
215
- float mod_mul = (float )mul / (float )((1 <<15 )- 1 );
216
- for (int8_t i = 0 ; i < 4 ; i ++ ) {
217
- int16_t ai = val >> (sizeof (int8_t ) * 8 * i );
218
- int32_t intermediate = ai * mod_mul ;
219
- if (intermediate > CHAR_MAX ) {
220
- intermediate = CHAR_MAX ;
221
- } else if (intermediate < CHAR_MIN ) {
222
- intermediate = CHAR_MIN ;
223
- }
224
- result |= (((uint32_t ) intermediate ) & 0xff ) << (sizeof (int16_t ) * 8 * i );
225
- }
226
- return result ;
227
- //#endif
131
+ static inline uint32_t tosigned16 (uint32_t val ) {
132
+ return __UADD16 (val , 0x80008000 );
228
133
}
229
134
230
- //TODO:
231
- static inline uint32_t mult16unsigned (uint32_t val , int32_t mul ) {
232
- // if mul == 0, no need in wasting cycles
233
- if (mul == 0 ) {
234
- return 0 ;
235
- }
236
- /* TODO: the below ARMv7m instructions "work", but the amplitude is much higher/louder
237
- #if (defined (__ARM_ARCH_7EM__) && (__ARM_ARCH_7EM__ == 1)) //Cortex-M4 w/FPU
238
- // there is no unsigned equivalent to the 'SMULWx' ARMv7 Thumb function,
239
- // so we have to do it by hand.
240
- uint32_t lo = val & 0xffff;
241
- uint32_t hi = val >> 16;
242
- //mp_printf(&mp_plat_print, "pre-asm: (mul: %d)\n\tval: %x\tlo: %x\thi: %x\n", mul, val, lo, hi);
243
- uint32_t val_lo;
244
- asm volatile("mul %0, %1, %2" : "=r" (val_lo) : "r" (mul), "r" (lo));
245
- asm volatile("mla %0, %1, %2, %3" : "=r" (val) : "r" (mul), "r" (hi), "r" (val_lo));
246
- //mp_printf(&mp_plat_print, "post-asm:\n\tval: %x\tlo: %x\n\n", val, val_lo);
247
- return val;
248
- #else
249
- */
250
- uint32_t result = 0 ;
251
- float mod_mul = (float )mul / (float )((1 <<15 )- 1 );
252
- for (int8_t i = 0 ; i < 2 ; i ++ ) {
253
- int16_t ai = (val >> (sizeof (uint16_t ) * 8 * i )) - 0x8000 ;
254
- int32_t intermediate = ai * mod_mul ;
255
- if (intermediate > SHRT_MAX ) {
256
- intermediate = SHRT_MAX ;
257
- } else if (intermediate < SHRT_MIN ) {
258
- intermediate = SHRT_MIN ;
259
- }
260
- result |= (((uint32_t ) intermediate ) + 0x8000 ) << (sizeof (int16_t ) * 8 * i );
261
- }
262
- return result ;
263
- //#endif
135
+ static inline uint32_t unpack8 (uint16_t val ) {
136
+ return ((val & 0xff00 ) << 16 ) | ((val & 0x00ff ) << 8 );
264
137
}
265
138
266
- static inline uint32_t mult16signed (uint32_t val , int32_t mul ) {
267
- // if mul == 0, no need in wasting cycles
268
- if (mul == 0 ) {
269
- return 0 ;
270
- }
271
- #if (defined (__ARM_ARCH_7EM__ ) && (__ARM_ARCH_7EM__ == 1 )) //Cortex-M4 w/FPU
272
- int32_t hi , lo ;
273
- enum { bits = 16 }; // saturate to 16 bits
274
- enum { shift = 0 }; // shift is done automatically
275
- asm volatile ("smulwb %0, %1, %2" : "=r" (lo ) : "r" (mul ), "r" (val ));
276
- asm volatile ("smulwt %0, %1, %2" : "=r" (hi ) : "r" (mul ), "r" (val ));
277
- asm volatile ("ssat %0, %1, %2, asr %3" : "=r" (lo ) : "I" (bits ), "r" (lo ), "I" (shift ));
278
- asm volatile ("ssat %0, %1, %2, asr %3" : "=r" (hi ) : "I" (bits ), "r" (hi ), "I" (shift ));
279
- asm volatile ("pkhbt %0, %1, %2, lsl #16" : "=r" (val ) : "r" (lo ), "r" (hi )); // pack
280
- return val ;
281
- #else
282
- uint32_t result = 0 ;
283
- float mod_mul = (float )mul / (float )((1 <<15 )- 1 );
284
- for (int8_t i = 0 ; i < 2 ; i ++ ) {
285
- int16_t ai = val >> (sizeof (int16_t ) * 8 * i );
286
- int32_t intermediate = ai * mod_mul ;
287
- if (intermediate > SHRT_MAX ) {
288
- intermediate = SHRT_MAX ;
289
- } else if (intermediate < SHRT_MIN ) {
290
- intermediate = SHRT_MIN ;
291
- }
292
- result |= (((uint32_t ) intermediate ) & 0xffff ) << (sizeof (int16_t ) * 8 * i );
293
- }
294
- return result ;
295
- #endif
139
+ static inline uint32_t pack8 (uint32_t val ) {
140
+ return ((val & 0xff000000 ) >> 16 ) | ((val & 0xff00 ) >> 8 );
296
141
}
297
142
143
+ #define LIKELY (x ) (__builtin_expect(!!(x), 1))
144
+ #define UNLIKELY (x ) (__builtin_expect(!!(x), 0))
298
145
static void mix_one_voice (audiomixer_mixer_obj_t * self ,
299
146
audiomixer_mixervoice_obj_t * voice , bool voices_active ,
300
147
uint32_t * word_buffer , uint32_t length ) {
301
- uint32_t j = 0 ;
302
148
bool voice_done = voice -> sample == NULL ;
303
- for ( uint32_t i = 0 ; i < length ; i ++ ) {
304
- if (! voice_done && j >= voice -> buffer_length ) {
149
+ while (! voice_done && length ! = 0 ) {
150
+ if (voice -> buffer_length == 0 ) {
305
151
if (!voice -> more_data ) {
306
152
if (voice -> loop ) {
307
153
audiosample_reset_buffer (voice -> sample , false, 0 );
308
154
} else {
309
155
voice -> sample = NULL ;
310
156
voice_done = true;
157
+ break ;
311
158
}
312
159
}
313
160
if (!voice_done ) {
@@ -316,64 +163,81 @@ static void mix_one_voice(audiomixer_mixer_obj_t* self,
316
163
// Track length in terms of words.
317
164
voice -> buffer_length /= sizeof (uint32_t );
318
165
voice -> more_data = result == GET_BUFFER_MORE_DATA ;
319
- j = 0 ;
320
166
}
321
167
}
168
+
169
+ uint32_t n = MIN (voice -> buffer_length , length );
170
+ uint32_t * src = voice -> remaining_buffer ;
171
+ uint16_t level = voice -> level ;
172
+
322
173
// First active voice gets copied over verbatim.
323
- uint32_t sample_value ;
324
- if (voice_done ) {
325
- // Exit early if another voice already set all samples once.
326
- if (voices_active ) {
327
- continue ;
328
- }
329
- sample_value = 0 ;
330
- if (!self -> samples_signed ) {
331
- if (self -> bits_per_sample == 8 ) {
332
- sample_value = 0x7f7f7f7f ;
174
+ if (!voices_active ) {
175
+ if (LIKELY (self -> bits_per_sample == 16 )) {
176
+ if (LIKELY (self -> samples_signed )) {
177
+ for (uint32_t i = 0 ; i < n ; i ++ ) {
178
+ uint32_t v = src [i ];
179
+ word_buffer [i ] = mult16signed (v , level );
180
+ }
333
181
} else {
334
- sample_value = 0x7fff7fff ;
182
+ for (uint32_t i = 0 ; i < n ; i ++ ) {
183
+ uint32_t v = src [i ];
184
+ v = tosigned16 (v );
185
+ word_buffer [i ] = mult16signed (v , level );
186
+ }
335
187
}
336
- }
337
- } else {
338
- sample_value = voice -> remaining_buffer [j ];
339
- }
340
-
341
- // apply the mixer level
342
- if (!self -> samples_signed ) {
343
- if (self -> bits_per_sample == 8 ) {
344
- sample_value = mult8unsigned (sample_value , voice -> level );
345
- } else {
346
- sample_value = mult16unsigned (sample_value , voice -> level );
347
- }
348
- } else {
349
- if (self -> bits_per_sample == 8 ) {
350
- sample_value = mult8signed (sample_value , voice -> level );
351
188
} else {
352
- sample_value = mult16signed (sample_value , voice -> level );
189
+ uint16_t * hword_buffer = (uint16_t * )word_buffer ;
190
+ uint16_t * hsrc = (uint16_t * )src ;
191
+ for (uint32_t i = 0 ; i < n * 2 ; i ++ ) {
192
+ uint32_t word = unpack8 (hsrc [i ]);
193
+ if (LIKELY (!self -> samples_signed )) {
194
+ word = tosigned16 (word );
195
+ }
196
+ word = mult16signed (word , level );
197
+ hword_buffer [i ] = pack8 (word );
198
+ }
353
199
}
354
- }
355
-
356
- if (!voices_active ) {
357
- word_buffer [i ] = sample_value ;
358
200
} else {
359
- if (self -> bits_per_sample == 8 ) {
360
- if (self -> samples_signed ) {
361
- word_buffer [i ] = add8signed (word_buffer [i ], sample_value );
201
+ if (LIKELY (self -> bits_per_sample == 16 )) {
202
+ if (LIKELY (self -> samples_signed )) {
203
+ for (uint32_t i = 0 ; i < n ; i ++ ) {
204
+ uint32_t word = src [i ];
205
+ word_buffer [i ] = add16signed (mult16signed (word , level ), word_buffer [i ]);
206
+ }
362
207
} else {
363
- word_buffer [i ] = add8unsigned (word_buffer [i ], sample_value );
208
+ for (uint32_t i = 0 ; i < n ; i ++ ) {
209
+ uint32_t word = src [i ];
210
+ word = tosigned16 (word );
211
+ word_buffer [i ] = add16signed (mult16signed (word , level ), word_buffer [i ]);
212
+ }
364
213
}
365
214
} else {
366
- if (self -> samples_signed ) {
367
- word_buffer [i ] = add16signed (word_buffer [i ], sample_value );
368
- } else {
369
- word_buffer [i ] = add16unsigned (word_buffer [i ], sample_value );
215
+ uint16_t * hword_buffer = (uint16_t * )word_buffer ;
216
+ uint16_t * hsrc = (uint16_t * )src ;
217
+ for (uint32_t i = 0 ; i < n * 2 ; i ++ ) {
218
+ uint32_t word = unpack8 (hsrc [i ]);
219
+ if (LIKELY (!self -> samples_signed )) {
220
+ word = tosigned16 (word );
221
+ }
222
+ word = mult16signed (word , level );
223
+ word = add16signed (word , unpack8 (hword_buffer [i ]));
224
+ hword_buffer [i ] = pack8 (word );
370
225
}
371
226
}
372
227
}
373
- j ++ ;
228
+ length -= n ;
229
+ word_buffer += n ;
230
+ voice -> remaining_buffer += n ;
231
+ voice -> buffer_length -= n ;
232
+ }
233
+
234
+ if (length && !voices_active ) {
235
+ uint32_t sample_value = self -> bits_per_sample == 8
236
+ ? 0x80808080 : 0x80008000 ;
237
+ for (uint32_t i = 0 ; i < length ; i ++ ) {
238
+ word_buffer [i ] = sample_value ;
239
+ }
374
240
}
375
- voice -> buffer_length -= j ;
376
- voice -> remaining_buffer += j ;
377
241
}
378
242
379
243
audioio_get_buffer_result_t audiomixer_mixer_get_buffer (audiomixer_mixer_obj_t * self ,
@@ -403,13 +267,27 @@ audioio_get_buffer_result_t audiomixer_mixer_get_buffer(audiomixer_mixer_obj_t*
403
267
}
404
268
self -> use_first_buffer = !self -> use_first_buffer ;
405
269
bool voices_active = false;
270
+ uint32_t length = self -> len / sizeof (uint32_t );
271
+
406
272
for (int32_t v = 0 ; v < self -> voice_count ; v ++ ) {
407
273
audiomixer_mixervoice_obj_t * voice = MP_OBJ_TO_PTR (self -> voice [v ]);
408
274
409
- mix_one_voice (self , voice , voices_active , word_buffer , self -> len / sizeof ( uint32_t ) );
275
+ mix_one_voice (self , voice , voices_active , word_buffer , length );
410
276
voices_active = true;
411
277
}
412
278
279
+ if (!self -> samples_signed ) {
280
+ if (self -> bits_per_sample == 16 ) {
281
+ for (uint32_t i = 0 ; i < length ; i ++ ) {
282
+ word_buffer [i ] = tounsigned16 (word_buffer [i ]);
283
+ }
284
+ } else {
285
+ for (uint32_t i = 0 ; i < length ; i ++ ) {
286
+ word_buffer [i ] = tounsigned8 (word_buffer [i ]);
287
+ }
288
+ }
289
+ }
290
+
413
291
self -> read_count += 1 ;
414
292
} else if (!self -> use_first_buffer ) {
415
293
* buffer = (uint8_t * ) self -> first_buffer ;
0 commit comments