Skip to content

Commit 2094770

Browse files
author
kazuho
committed
separate impl. of binary search
add mitsunari-san's optimized code using SSE git-svn-id: http://svn.coderepos.org/share/lang/cplusplus/range_coder@7123 d0d07461-0603-4401-acd4-de1884942a52
1 parent 98496f4 commit 2094770

File tree

1 file changed

+54
-11
lines changed

1 file changed

+54
-11
lines changed

range_coder.hpp

Lines changed: 54 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
#ifndef __RANGE_CODER_HPP__
22
#define __RANGE_CODER_HPP__
33

4+
#ifdef RANGE_CODER_USE_SSE
5+
#include <xmmintrin.h>
6+
#endif
7+
48
// original work by Daisuke Okanohara 2006/06/16
59

610
struct rc_type_t {
@@ -85,29 +89,68 @@ template <class Iter> class rc_encoder_t : public rc_type_t {
8589
uint counter;
8690
};
8791

88-
template <class Iterator, unsigned N> class rc_decoder_t : public rc_type_t {
92+
template <typename FreqType, unsigned _N> struct rc_decoder_search_traits_t : public rc_type_t {
93+
typedef FreqType freq_type;
94+
enum {
95+
N = _N
96+
};
97+
};
98+
99+
template <typename FreqType, unsigned _N> struct rc_decoder_search_t : public rc_decoder_search_traits_t<FreqType, _N> {
100+
static uint get_index(const FreqType *freq, uint pos) {
101+
uint left = 0;
102+
uint right = _N;
103+
while(left < right) {
104+
uint mid = (left+right)/2;
105+
if (static_cast<uint>(freq[mid+1]) <= pos)
106+
left = mid+1;
107+
else
108+
right = mid;
109+
}
110+
return left;
111+
}
112+
};
113+
114+
#ifdef RANGE_CODER_USE_SSE
115+
116+
template<> struct rc_decoder_search_t<short, 256> : public rc_decoder_search_traits_t<short, 256> {
117+
static uint get_index(const freq_type *freq, uint pos) {
118+
__m128i v = _mm_set1_epi16(pos);
119+
unsigned i, mask = 0;
120+
for (i = 0; i < N; i += 16) {
121+
__m128i x = *reinterpret_cast<const __m128i*>(freq + i);
122+
__m128i y = *reinterpret_cast<const __m128i*>(freq + i + 8);
123+
__m128i a = _mm_cmplt_epi16(v, x);
124+
__m128i b = _mm_cmplt_epi16(v, y);
125+
mask = (_mm_movemask_epi8(b) << 16) | _mm_movemask_epi8(a);
126+
if (mask) {
127+
break;
128+
}
129+
}
130+
return i + (__builtin_ctz(mask) >> 1) - 1;
131+
}
132+
};
133+
134+
#endif
135+
136+
template <class Iterator, class SearchType> class rc_decoder_t : public rc_type_t {
89137
public:
138+
typedef SearchType search_type;
139+
typedef typename search_type::freq_type freq_type;
140+
static const unsigned N = search_type::N;
90141
rc_decoder_t(const Iterator& _i, const Iterator _e) : iter(_i), iter_end(_e) {
91142
R = 0xFFFFFFFF;
92143
D = 0;
93144
for (int i = 0; i < 4; i++) {
94145
D = (D << 8) | next();
95146
}
96147
}
97-
uint decode(const uint total, const uint* cumFreq) {
148+
uint decode(const uint total, const freq_type* cumFreq) {
98149
const uint r = R / total;
99150
const uint targetPos = std::min(total-1, D / r);
100151

101152
//find target s.t. cumFreq[target] <= targetPos < cumFreq[target+1]
102-
uint left = 0;
103-
uint right = N;
104-
while(left < right) {
105-
uint mid = (left+right)/2;
106-
if (cumFreq[mid+1] <= targetPos) left = mid+1;
107-
else right = mid;
108-
}
109-
110-
const uint target = left;
153+
const uint target = search_type::get_index(cumFreq, targetPos);
111154
const uint low = cumFreq[target];
112155
const uint high = cumFreq[target+1];
113156

0 commit comments

Comments
 (0)