Skip to content

Commit d48e614

Browse files
committed
PanasonicV8Decompressor: faster bitreversing for v4eU8 (-5%)
https://godbolt.org/z/Gfn6ebG39 https://alive2.llvm.org/ce/z/DRqbFV ``` Comparing /home/lebedevri/rawspeed/build-old/src/utilities/rsbench/rsbench to /home/lebedevri/rawspeed/build-new/src/utilities/rsbench/rsbench Benchmark Time CPU Time Old Time New CPU Old CPU New --------------------------------------------------------------------------------------------------------------------------------------------------------------- P1126458_mechanical.RW2/threads:1/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 99 vs 99 P1126458_mechanical.RW2/threads:1/process_time/real_time_mean -0.0448 -0.0449 151 145 151 145 P1126458_mechanical.RW2/threads:1/process_time/real_time_median -0.0446 -0.0445 151 145 151 145 P1126458_mechanical.RW2/threads:1/process_time/real_time_stddev -0.6307 -0.6902 0 0 0 0 P1126458_mechanical.RW2/threads:1/process_time/real_time_cv -0.6133 -0.6756 0 0 0 0 P1126458_mechanical.RW2/threads:32/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 99 vs 99 P1126458_mechanical.RW2/threads:32/process_time/real_time_mean -0.0331 -0.0331 77 75 155 150 P1126458_mechanical.RW2/threads:32/process_time/real_time_median -0.0349 -0.0349 78 75 155 150 P1126458_mechanical.RW2/threads:32/process_time/real_time_stddev +0.1374 +0.1165 0 0 0 0 P1126458_mechanical.RW2/threads:32/process_time/real_time_cv +0.1763 +0.1548 0 0 0 0 OVERALL_GEOMEAN -0.0390 -0.0390 0 0 0 0 ```
1 parent d165368 commit d48e614

File tree

2 files changed

+32
-2
lines changed

2 files changed

+32
-2
lines changed

src/librawspeed/adt/Bit.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,12 @@
2424
#include "adt/Casts.h"
2525
#include "adt/Invariant.h"
2626
#include <algorithm>
27+
#include <array>
2728
#include <bit>
2829
#include <climits>
2930
#include <concepts>
3031
#include <cstdint>
32+
#include <cstring>
3133
#include <type_traits>
3234

3335
namespace rawspeed {
@@ -146,4 +148,28 @@ T bitreverse(const T v) {
146148
return uint8_t((uint8_t(v) * 0x0202020202ULL & 0x010884422010ULL) % 1023);
147149
}
148150

151+
#if __has_builtin(__builtin_bitreverse32)
152+
template <class T>
153+
requires std::same_as<T, uint32_t>
154+
T bitreverse(const T v) {
155+
return __builtin_bitreverse32(v);
156+
}
157+
#endif
158+
159+
template <class T>
160+
requires std::same_as<T, uint8_t>
161+
std::array<T, 4> bitreverse_each(std::array<T, 4> x) {
162+
#if !__has_builtin(__builtin_bitreverse32)
163+
for (T& e : x)
164+
e = bitreverse(e);
165+
#else
166+
uint32_t tmp;
167+
std::memcpy(&tmp, x.data(), sizeof(uint32_t));
168+
tmp = bitreverse(tmp);
169+
tmp = __builtin_bswap32(tmp);
170+
std::memcpy(x.data(), &tmp, sizeof(uint32_t));
171+
#endif
172+
return x;
173+
}
174+
149175
} // namespace rawspeed

src/librawspeed/decompressors/PanasonicV8Decompressor.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,12 @@ struct BitStreamerReversedSequentialReplenisher
8989
std::copy_n(currInput.begin(), BitStreamerTraits<Tag>::MaxProcessBytes,
9090
tmp.begin());
9191

92-
for (std::byte& b : tmp)
93-
b = std::byte{bitreverse(uint8_t(b))};
92+
std::array<uint8_t, 4> ints;
93+
for (int i = 0; i != 4; ++i)
94+
ints[i] = uint8_t(tmp(i));
95+
ints = bitreverse_each(ints);
96+
for (int i = 0; i != 4; ++i)
97+
tmp(i) = std::byte{ints[i]};
9498

9599
return tmpStorage;
96100
}

0 commit comments

Comments
 (0)