Skip to content

Commit 54c19d8

Browse files
committed
PanasonicV8Decompressor: moar faster (-5%)
``` Comparing /home/lebedevri/rawspeed/build-old/src/utilities/rsbench/rsbench to /home/lebedevri/rawspeed/build-new/src/utilities/rsbench/rsbench Benchmark Time CPU Time Old Time New CPU Old CPU New --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Panasonic/DC-S5M2/P1126458_mechanical.RW2/threads:1/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 99 vs 99 Panasonic/DC-S5M2/P1126458_mechanical.RW2/threads:1/process_time/real_time_mean -0.0355 -0.0354 215 208 215 208 Panasonic/DC-S5M2/P1126458_mechanical.RW2/threads:1/process_time/real_time_median -0.0374 -0.0372 215 207 215 207 Panasonic/DC-S5M2/P1126458_mechanical.RW2/threads:1/process_time/real_time_stddev +9.8172 +10.8040 0 1 0 1 Panasonic/DC-S5M2/P1126458_mechanical.RW2/threads:1/process_time/real_time_cv +10.2150 +11.2375 0 0 0 0 Panasonic/DC-S5M2/P1126458_mechanical.RW2/threads:32/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 99 vs 99 Panasonic/DC-S5M2/P1126458_mechanical.RW2/threads:32/process_time/real_time_mean -0.0485 -0.0485 118 112 235 224 Panasonic/DC-S5M2/P1126458_mechanical.RW2/threads:32/process_time/real_time_median -0.0485 -0.0485 118 112 235 224 Panasonic/DC-S5M2/P1126458_mechanical.RW2/threads:32/process_time/real_time_stddev +0.3820 +0.4222 0 0 0 0 Panasonic/DC-S5M2/P1126458_mechanical.RW2/threads:32/process_time/real_time_cv +0.4524 +0.4947 0 0 0 0 OVERALL_GEOMEAN -0.0420 -0.0420 0 0 0 0 ```
1 parent f9ad8a8 commit 54c19d8

File tree

1 file changed

+33
-12
lines changed

1 file changed

+33
-12
lines changed

src/librawspeed/decompressors/PanasonicV8Decompressor.cpp

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "adt/Casts.h"
3030
#include "adt/CroppedArray2DRef.h"
3131
#include "adt/Invariant.h"
32+
#include "adt/Optional.h"
3233
#include "adt/Point.h"
3334
#include "adt/TiledArray2DRef.h"
3435
#include "bitstreams/BitStream.h"
@@ -202,22 +203,37 @@ void isValidImageGrid(iRectangle2D imgDim,
202203
ThrowRDE("Tiles do not cover whole output image");
203204
}
204205

205-
int minBitsPerPixelNeeded(
206-
Array1DRef<const PanasonicV8Decompressor::DecoderLUTEntry> mDecoderLUT) {
206+
template <typename T>
207+
int bitsPerPixelNeeded(
208+
Array1DRef<const PanasonicV8Decompressor::DecoderLUTEntry> mDecoderLUT,
209+
T cb) {
207210
invariant(mDecoderLUT.size() > 0);
208211
const auto r = std::accumulate(
209-
mDecoderLUT.begin(), mDecoderLUT.end(), std::numeric_limits<int>::max(),
210-
[](int init, const PanasonicV8Decompressor::DecoderLUTEntry& e) {
212+
mDecoderLUT.begin(), mDecoderLUT.end(), Optional<int>(),
213+
[cb](auto init, const PanasonicV8Decompressor::DecoderLUTEntry& e) {
211214
if (e.isSentinel())
212215
return init;
213216
invariant(e.bitcount > 0);
214217
const auto total = e.bitcount + e.diffCat;
215218
invariant(total > 0);
216-
return std::min(init, total);
219+
init = init.has_value() ? cb(*init, total) : total;
220+
return init;
217221
});
218-
invariant(r > 0);
219-
invariant(r <= (16 + 17));
220-
return r;
222+
const auto bit = *r;
223+
invariant(bit > 0);
224+
return bit;
225+
}
226+
227+
int minBitsPerPixelNeeded(
228+
Array1DRef<const PanasonicV8Decompressor::DecoderLUTEntry> mDecoderLUT) {
229+
return bitsPerPixelNeeded(mDecoderLUT,
230+
[](auto a, auto b) { return std::min(a, b); });
231+
}
232+
233+
int maxBitsPerPixelNeeded(
234+
Array1DRef<const PanasonicV8Decompressor::DecoderLUTEntry> mDecoderLUT) {
235+
return bitsPerPixelNeeded(mDecoderLUT,
236+
[](auto a, auto b) { return std::max(a, b); });
221237
}
222238

223239
} // namespace
@@ -327,6 +343,10 @@ PanasonicV8Decompressor::PanasonicV8Decompressor(RawImage outputImg,
327343
}
328344
if (!mRawOutput->dim.hasPositiveArea())
329345
ThrowRDE("Unexpected image dimensions");
346+
const auto maxBpp = maxBitsPerPixelNeeded(mParams.mDecoderLUT);
347+
if (maxBpp > 32) {
348+
ThrowRDE("Single pixel decode may consume more than 32 bits");
349+
}
330350
const auto minBpp = minBitsPerPixelNeeded(mParams.mDecoderLUT);
331351
for (int stripIdx = 0; stripIdx < mParams.mStrips.size(); ++stripIdx) {
332352
const auto strip = mParams.mStrips(stripIdx);
@@ -427,19 +447,20 @@ void PanasonicV8Decompressor::decompressStrip(const Array2DRef<uint16_t> out,
427447
int32_t inline PanasonicV8Decompressor::InternalDecoder::decodeNextDiffValue() {
428448
// Retrieve the difference category, which indicates magnitude of the
429449
// difference between the predicted and actual value.
430-
const auto next16 = uint16_t(mBitPump.peekBits(16));
450+
mBitPump.fill(32);
451+
const auto next16 = uint16_t(mBitPump.peekBitsNoFill(16));
431452
invariant(mLUT.size() == 1 + UINT16_MAX);
432453
const auto& [codeLen, codeValue] = mLUT(next16);
433454
if (codeValue == 0 && codeLen == 7)
434455
ThrowRDE("Decoding encountered an invalid value!");
435-
mBitPump.skipBits(
436-
codeLen); // Skip the bits that encoded the difference category
456+
// Skip the bits that encoded the difference category
457+
mBitPump.skipBitsNoFill(codeLen);
437458
int diffLen = codeValue;
438459

439460
if (diffLen == 0)
440461
return 0;
441462

442-
const uint32_t diff = mBitPump.getBits(diffLen);
463+
const uint32_t diff = mBitPump.getBitsNoFill(diffLen);
443464
return AbstractPrefixCodeDecoder<BaselineCodeTag>::extend(diff, diffLen);
444465
}
445466

0 commit comments

Comments
 (0)