Skip to content

Commit cf05bb1

Browse files
committed
Merge pull request godotengine#106602 from DeeJayLSP/faster-qoa-enc
Update `qoa.h` to latest git
2 parents 3120d17 + bc19259 commit cf05bb1

File tree

2 files changed

+28
-28
lines changed

2 files changed

+28
-28
lines changed

thirdparty/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -792,7 +792,7 @@ Collection of single-file libraries used in Godot components.
792792
- `polypartition-0002-shadow-warning.patch` (GH-66808)
793793
- `qoa.{c,h}`
794794
* Upstream: https://github.com/phoboslab/qoa
795-
* Version: git (a2d927f8ce78a85e903676a33e0f956e53b89f7d, 2024)
795+
* Version: git (ae07b57deb98127a5b40916cb57775823d7437d2, 2025)
796796
* License: MIT
797797
* Modifications: Added implementation through `qoa.c`.
798798
- `r128.{c,h}`

thirdparty/misc/qoa.h

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -31,20 +31,20 @@ struct {
3131
struct {
3232
char magic[4]; // magic bytes "qoaf"
3333
uint32_t samples; // samples per channel in this file
34-
} file_header;
34+
} file_header;
3535
3636
struct {
3737
struct {
3838
uint8_t num_channels; // no. of channels
3939
uint24_t samplerate; // samplerate in hz
4040
uint16_t fsamples; // samples per channel in this frame
4141
uint16_t fsize; // frame size (includes this header)
42-
} frame_header;
42+
} frame_header;
4343
4444
struct {
4545
int16_t history[4]; // most recent last
4646
int16_t weights[4]; // most recent last
47-
} lms_state[num_channels];
47+
} lms_state[num_channels];
4848
4949
qoa_slice_t slices[256][num_channels];
5050
@@ -66,15 +66,15 @@ frame may contain between 1 .. 256 (inclusive) slices per channel. The last
6666
slice (for each channel) in the last frame may contain less than 20 samples; the
6767
slice still must be 8 bytes wide, with the unused samples zeroed out.
6868
69-
Channels are interleaved per slice. E.g. for 2 channel stereo:
69+
Channels are interleaved per slice. E.g. for 2 channel stereo:
7070
slice[0] = L, slice[1] = R, slice[2] = L, slice[3] = R ...
7171
7272
A valid QOA file or stream must have at least one frame. Each frame must contain
7373
at least one channel and one sample with a samplerate between 1 .. 16777215
7474
(inclusive).
7575
7676
If the total number of samples is not known by the encoder, the samples in the
77-
file header may be set to 0x00000000 to indicate that the encoder is
77+
file header may be set to 0x00000000 to indicate that the encoder is
7878
"streaming". In a streaming context, the samplerate and number of channels may
7979
differ from frame to frame. For static files (those with samples set to a
8080
non-zero value), each frame must have the same number of channels and same
@@ -88,15 +88,15 @@ counts 1 .. 8 is:
8888
8989
1. Mono
9090
2. L, R
91-
3. L, R, C
92-
4. FL, FR, B/SL, B/SR
93-
5. FL, FR, C, B/SL, B/SR
91+
3. L, R, C
92+
4. FL, FR, B/SL, B/SR
93+
5. FL, FR, C, B/SL, B/SR
9494
6. FL, FR, C, LFE, B/SL, B/SR
95-
7. FL, FR, C, LFE, B, SL, SR
95+
7. FL, FR, C, LFE, B, SL, SR
9696
8. FL, FR, C, LFE, BL, BR, SL, SR
9797
9898
QOA predicts each audio sample based on the previously decoded ones using a
99-
"Sign-Sign Least Mean Squares Filter" (LMS). This prediction plus the
99+
"Sign-Sign Least Mean Squares Filter" (LMS). This prediction plus the
100100
dequantized residual forms the final output sample.
101101
102102
*/
@@ -178,9 +178,9 @@ typedef unsigned long long qoa_uint64_t;
178178

179179

180180
/* The quant_tab provides an index into the dequant_tab for residuals in the
181-
range of -8 .. 8. It maps this range to just 3bits and becomes less accurate at
182-
the higher end. Note that the residual zero is identical to the lowest positive
183-
value. This is mostly fine, since the qoa_div() function always rounds away
181+
range of -8 .. 8. It maps this range to just 3bits and becomes less accurate at
182+
the higher end. Note that the residual zero is identical to the lowest positive
183+
value. This is mostly fine, since the qoa_div() function always rounds away
184184
from zero. */
185185

186186
static const int qoa_quant_tab[17] = {
@@ -193,8 +193,8 @@ static const int qoa_quant_tab[17] = {
193193
/* We have 16 different scalefactors. Like the quantized residuals these become
194194
less accurate at the higher end. In theory, the highest scalefactor that we
195195
would need to encode the highest 16bit residual is (2**16)/8 = 8192. However we
196-
rely on the LMS filter to predict samples accurately enough that a maximum
197-
residual of one quarter of the 16 bit range is sufficient. I.e. with the
196+
rely on the LMS filter to predict samples accurately enough that a maximum
197+
residual of one quarter of the 16 bit range is sufficient. I.e. with the
198198
scalefactor 2048 times the quant range of 8 we can encode residuals up to 2**14.
199199
200200
The scalefactor values are computed as:
@@ -205,9 +205,9 @@ static const int qoa_scalefactor_tab[16] = {
205205
};
206206

207207

208-
/* The reciprocal_tab maps each of the 16 scalefactors to their rounded
209-
reciprocals 1/scalefactor. This allows us to calculate the scaled residuals in
210-
the encoder with just one multiplication instead of an expensive division. We
208+
/* The reciprocal_tab maps each of the 16 scalefactors to their rounded
209+
reciprocals 1/scalefactor. This allows us to calculate the scaled residuals in
210+
the encoder with just one multiplication instead of an expensive division. We
211211
do this in .16 fixed point with integers, instead of floats.
212212
213213
The reciprocal_tab is computed as:
@@ -218,11 +218,11 @@ static const int qoa_reciprocal_tab[16] = {
218218
};
219219

220220

221-
/* The dequant_tab maps each of the scalefactors and quantized residuals to
221+
/* The dequant_tab maps each of the scalefactors and quantized residuals to
222222
their unscaled & dequantized version.
223223
224224
Since qoa_div rounds away from the zero, the smallest entries are mapped to 3/4
225-
instead of 1. The dequant_tab assumes the following dequantized values for each
225+
instead of 1. The dequant_tab assumes the following dequantized values for each
226226
of the quant_tab indices and is computed as:
227227
float dqt[8] = {0.75, -0.75, 2.5, -2.5, 4.5, -4.5, 7, -7};
228228
dequant_tab[s][q] <- round_ties_away_from_zero(scalefactor_tab[s] * dqt[q])
@@ -258,7 +258,7 @@ adjusting 4 weights based on the residual of the previous prediction.
258258
The next sample is predicted as the sum of (weight[i] * history[i]).
259259
260260
The adjustment of the weights is done with a "Sign-Sign-LMS" that adds or
261-
subtracts the residual to each weight, based on the corresponding sample from
261+
subtracts the residual to each weight, based on the corresponding sample from
262262
the history. This, surprisingly, is sufficient to get worthwhile predictions.
263263
264264
This is all done with fixed point integers. Hence the right-shifts when updating
@@ -285,8 +285,8 @@ static void qoa_lms_update(qoa_lms_t *lms, int sample, int residual) {
285285
}
286286

287287

288-
/* qoa_div() implements a rounding division, but avoids rounding to zero for
289-
small numbers. E.g. 0.1 will be rounded to 1. Note that 0 itself still
288+
/* qoa_div() implements a rounding division, but avoids rounding to zero for
289+
small numbers. E.g. 0.1 will be rounded to 1. Note that 0 itself still
290290
returns as 0, which is handled in the qoa_quant_tab[].
291291
qoa_div() takes an index into the .16 fixed point qoa_reciprocal_tab as an
292292
argument, so it can do the division with a cheaper integer multiplication. */
@@ -385,10 +385,10 @@ unsigned int qoa_encode_frame(const short *sample_data, qoa_desc *qoa, unsigned
385385
for (unsigned int c = 0; c < channels; c++) {
386386
int slice_len = qoa_clamp(QOA_SLICE_LEN, 0, frame_len - sample_index);
387387
int slice_start = sample_index * channels + c;
388-
int slice_end = (sample_index + slice_len) * channels + c;
388+
int slice_end = (sample_index + slice_len) * channels + c;
389389

390-
/* Brute for search for the best scalefactor. Just go through all
391-
16 scalefactors, encode all samples for the current slice and
390+
/* Brute force search for the best scalefactor. Just go through all
391+
16 scalefactors, encode all samples for the current slice and
392392
meassure the total squared error. */
393393
qoa_uint64_t best_rank = -1;
394394
#ifdef QOA_RECORD_TOTAL_ERROR
@@ -402,7 +402,7 @@ unsigned int qoa_encode_frame(const short *sample_data, qoa_desc *qoa, unsigned
402402
/* There is a strong correlation between the scalefactors of
403403
neighboring slices. As an optimization, start testing
404404
the best scalefactor of the previous slice first. */
405-
int scalefactor = (sfi + prev_scalefactor[c]) % 16;
405+
int scalefactor = (sfi + prev_scalefactor[c]) & (16 - 1);
406406

407407
/* We have to reset the LMS state to the last known good one
408408
before trying each scalefactor, as each pass updates the LMS

0 commit comments

Comments
 (0)