Skip to content

Commit 16d09fd

Browse files
author
kazuho
committed
add test code and sample table builder script
support setting a non-zero base for freq. table git-svn-id: http://svn.coderepos.org/share/lang/cplusplus/range_coder@7156 d0d07461-0603-4401-acd4-de1884942a52
1 parent 2094770 commit 16d09fd

File tree

3 files changed

+140
-15
lines changed

3 files changed

+140
-15
lines changed

bench.cpp

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
extern "C" {
2+
#include <assert.h>
3+
#include <limits.h>
4+
#include <stdio.h>
5+
#include <stdlib.h>
6+
#include <string.h>
7+
}
8+
#include <algorithm>
9+
#include "range_coder.hpp"
10+
11+
#include "table.c"
12+
13+
class writer_t {
14+
char **p, *max;
15+
public:
16+
struct overrun_t {
17+
};
18+
writer_t(char **_p, char *_max) : p(_p), max(_max) {}
19+
writer_t &operator=(char c) {
20+
if (*p == max) {
21+
throw overrun_t();
22+
}
23+
*(*p)++ = c;
24+
return *this;
25+
}
26+
writer_t &operator*() { return *this; }
27+
writer_t &operator++() { return *this; }
28+
writer_t &operator++(int) { return *this; }
29+
};
30+
31+
#define FREQ_BASE SHRT_MIN
32+
#define LOOP_CNT 1024
33+
34+
int main(int argc, char **argv)
35+
{
36+
char buf[1024 * 1024], cbuf[1024 * 1024], rbuf[1024 * 1024];
37+
size_t buflen, cbuflen;
38+
unsigned long long start;
39+
int i;
40+
41+
/* read */
42+
buflen = fread(buf, 1, sizeof(buf) - 1, stdin);
43+
/* compress */
44+
start = rdtsc();
45+
for (i = 0; i < LOOP_CNT; i++) {
46+
char *cbufpt = cbuf;
47+
rc_encoder_t<writer_t> enc(writer_t(&cbufpt, cbuf + sizeof(cbuf)));
48+
for (const char *p = buf, *e = buf + buflen; p != e; p++) {
49+
unsigned ch = (unsigned char)*p;
50+
#ifdef USE_ORDERED_TABLE
51+
ch = to_ordered[ch];
52+
#endif
53+
assert(freq[ch] != freq[ch + 1]);
54+
enc.encode(freq[ch] - FREQ_BASE, freq[ch + 1] - FREQ_BASE,
55+
freq[256] - FREQ_BASE);
56+
}
57+
enc.final();
58+
cbuflen = cbufpt - cbuf;
59+
}
60+
printf("compression: %lu Mticks\n", (long)((rdtsc() - start) / 1024 / 1024));
61+
/* decompress */
62+
start = rdtsc();
63+
for (i = 0; i < LOOP_CNT; i++) {
64+
rc_decoder_t<const char*, rc_decoder_search_t<short, 256, FREQ_BASE> >
65+
dec(cbuf, cbuf + cbuflen);
66+
for (char *p = rbuf, *e = rbuf + buflen; p != e; p++) {
67+
unsigned ch = dec.decode(freq[256] - FREQ_BASE, freq);
68+
#ifdef USE_ORDERED_TABLE
69+
ch = from_ordered[ch];
70+
#endif
71+
*p = ch;
72+
}
73+
}
74+
printf("decompression: %lu Mticks\n",
75+
(long)((rdtsc() - start) / 1024 / 1024));
76+
/* check result */
77+
if (memcmp(buf, rbuf, buflen) != 0) {
78+
fprintf(stderr, "original data and decompressed data does not match.\n");
79+
exit(99);
80+
}
81+
82+
return 0;
83+
}

build_table.pl

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#! /usr/bin/perl
2+
3+
use strict;
4+
use warnings;
5+
6+
use Getopt::Long;
7+
use List::Util qw/sum/;
8+
9+
my ($do_ordered);
10+
11+
GetOptions(
12+
'ordered' => \$do_ordered,
13+
);
14+
15+
16+
my @cnt = map { 0 } 0..255;
17+
18+
while (<>) {
19+
foreach my $c (split '', $_) {
20+
$cnt[ord $c]++;
21+
}
22+
}
23+
24+
if ($do_ordered) {
25+
my @order = sort { $cnt[$b] <=> $cnt[$a] } 0..255;
26+
print "#define USE_ORDERED_TABLE 1\n";
27+
print "static unsigned char from_ordered[] = {", join(',', @order), "};\n";
28+
my %r = map { $order[$_] => $_ } @order;
29+
print "static unsigned char to_ordered[] = {", join(',', map { $r{$_} } 0..255), "};\n";
30+
@cnt = map { $cnt[$order[$_]] } 0..255;
31+
}
32+
33+
my @freq;
34+
my $acc = 0;
35+
my $cc = sum @cnt;
36+
for (my $i = 0; $i < 256; $i++) {
37+
push @freq, $acc;
38+
$acc += int(($cnt[$i] / $cc) * 0xfe00);
39+
}
40+
push @freq, $acc;
41+
42+
print "static short freq[] __attribute__((aligned(16))) = {", join(',', map { $_ - 0x8000 } @freq), "};\n";

range_coder.hpp

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -89,35 +89,34 @@ template <class Iter> class rc_encoder_t : public rc_type_t {
8989
uint counter;
9090
};
9191

92-
template <typename FreqType, unsigned _N> struct rc_decoder_search_traits_t : public rc_type_t {
92+
template <typename FreqType, unsigned _N, int _BASE> struct rc_decoder_search_traits_t : public rc_type_t {
9393
typedef FreqType freq_type;
9494
enum {
95-
N = _N
95+
N = _N,
96+
BASE = _BASE
9697
};
9798
};
9899

99-
template <typename FreqType, unsigned _N> struct rc_decoder_search_t : public rc_decoder_search_traits_t<FreqType, _N> {
100-
static uint get_index(const FreqType *freq, uint pos) {
100+
template <typename FreqType, unsigned _N, int _BASE = 0> struct rc_decoder_search_t : public rc_decoder_search_traits_t<FreqType, _N, _BASE> {
101+
static uint get_index(const FreqType *freq, FreqType pos) {
101102
uint left = 0;
102103
uint right = _N;
103104
while(left < right) {
104105
uint mid = (left+right)/2;
105-
if (static_cast<uint>(freq[mid+1]) <= pos)
106-
left = mid+1;
107-
else
108-
right = mid;
106+
if (freq[mid+1] <= pos) left = mid+1;
107+
else right = mid;
109108
}
110109
return left;
111110
}
112111
};
113112

114113
#ifdef RANGE_CODER_USE_SSE
115114

116-
template<> struct rc_decoder_search_t<short, 256> : public rc_decoder_search_traits_t<short, 256> {
117-
static uint get_index(const freq_type *freq, uint pos) {
115+
template<int _BASE> struct rc_decoder_search_t<short, 256, _BASE> : public rc_decoder_search_traits_t<short, 256, _BASE> {
116+
static uint get_index(const short *freq, short pos) {
118117
__m128i v = _mm_set1_epi16(pos);
119118
unsigned i, mask = 0;
120-
for (i = 0; i < N; i += 16) {
119+
for (i = 0; i < 256; i += 16) {
121120
__m128i x = *reinterpret_cast<const __m128i*>(freq + i);
122121
__m128i y = *reinterpret_cast<const __m128i*>(freq + i + 8);
123122
__m128i a = _mm_cmplt_epi16(v, x);
@@ -147,12 +146,13 @@ template <class Iterator, class SearchType> class rc_decoder_t : public rc_type_
147146
}
148147
uint decode(const uint total, const freq_type* cumFreq) {
149148
const uint r = R / total;
150-
const uint targetPos = std::min(total-1, D / r);
149+
const int targetPos = std::min(total-1, D / r);
151150

152151
//find target s.t. cumFreq[target] <= targetPos < cumFreq[target+1]
153-
const uint target = search_type::get_index(cumFreq, targetPos);
154-
const uint low = cumFreq[target];
155-
const uint high = cumFreq[target+1];
152+
const uint target =
153+
search_type::get_index(cumFreq, targetPos + search_type::BASE);
154+
const uint low = cumFreq[target] - search_type::BASE;
155+
const uint high = cumFreq[target+1] - search_type::BASE;
156156

157157
D -= r * low;
158158
if (high != total) {

0 commit comments

Comments
 (0)