Skip to content

Commit 9d3640c

Browse files
Add float64 codec tests (#299)
Resolves #292 - Added tests to Go and Java implementation. - Removed unnecessary code in the implementation. - Update spec description.
1 parent 52df499 commit 9d3640c

File tree

4 files changed

+179
-5
lines changed

4 files changed

+179
-5
lines changed

go/pkg/encoders/float64.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,9 @@ func (e *Float64Encoder) Encode(val float64) {
4848
prevTrailing := e.trailingBits
4949
sigbits := 64 - leading - trailing
5050

51-
if prevLeading != -1 && leading >= prevLeading && trailing >= prevTrailing {
51+
if leading >= prevLeading && trailing >= prevTrailing {
5252
// Fits in previous [leading..trailing] range.
53-
if 53-prevLeading-prevTrailing < sigbits {
53+
if 53-prevLeading-prevTrailing <= sigbits {
5454
// Current scheme is smaller than trying reset the range. Use the current scheme.
5555
e.buf.WriteBits(0b10, 2)
5656
bitCount := uint(64 - prevLeading - prevTrailing)

go/pkg/encoders/float64_test.go

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
package encoders
2+
3+
import (
4+
"fmt"
5+
"math"
6+
"math/rand/v2"
7+
"testing"
8+
"time"
9+
10+
"github.com/splunk/stef/go/pkg"
11+
)
12+
13+
func equalFloat(a, b float64) bool {
14+
if math.IsNaN(a) && math.IsNaN(b) {
15+
return true
16+
}
17+
return a == b
18+
}
19+
20+
func verifyDecode(t *testing.T, values []float64) {
21+
// Encode
22+
var buf pkg.BitsWriter
23+
limiter := &pkg.SizeLimiter{}
24+
enc := &Float64Encoder{buf: buf, limiter: limiter}
25+
26+
for _, v := range values {
27+
enc.Encode(v)
28+
}
29+
30+
// Get encoded bytes
31+
var bytesBuf pkg.BytesWriter
32+
enc.buf.Close()
33+
enc.WriteTo(&bytesBuf)
34+
encoded := bytesBuf.Bytes()
35+
36+
// Decode
37+
dec := &Float64Decoder{}
38+
dec.buf.Reset(encoded)
39+
40+
for i, want := range values {
41+
var got float64
42+
if err := dec.Decode(&got); err != nil {
43+
t.Fatalf("decode failed at %d: %v", i, err)
44+
}
45+
if !equalFloat(got, want) {
46+
t.Errorf(
47+
"mismatch at %d: got %v (bits 0x%x), want %v (bits 0x%x)", i, got, math.Float64bits(got), want,
48+
math.Float64bits(want),
49+
)
50+
}
51+
}
52+
}
53+
54+
func TestFloat64EncoderDecoder_Basic(t *testing.T) {
55+
values := []float64{1.0, 1.0, 2.0, 2.0, 3.1415, 3.1415, -1.0, 0.0, -0.0, math.NaN(), math.Inf(1), math.Inf(-1)}
56+
verifyDecode(t, values)
57+
}
58+
59+
func TestFloat64EncoderDecoder_Bits(t *testing.T) {
60+
bits := []uint64{
61+
0x000000000000,
62+
0x0FFFFFFFFFF0,
63+
0x0F0000000000,
64+
0x0F0000000F00,
65+
0,
66+
^uint64(0),
67+
0,
68+
}
69+
70+
values := []float64{}
71+
for _, bitVal := range bits {
72+
values = append(values, math.Float64frombits(bitVal))
73+
}
74+
75+
verifyDecode(t, values)
76+
}
77+
78+
func TestFloat64EncoderDecoder_LeadingTrailing(t *testing.T) {
79+
// Values with only one bit difference, to test leading/trailing zeros logic
80+
base := math.Float64frombits(0x3FF0000000000000) // 1.0
81+
values := []float64{base}
82+
for i := 0; i < 64; i++ {
83+
values = append(values, math.Float64frombits(math.Float64bits(base)^(1<<uint(i))))
84+
}
85+
verifyDecode(t, values)
86+
}
87+
88+
func randFloat(random *rand.Rand) float64 {
89+
// Generate random float64, including negative values
90+
v := random.Float64()*1e10 - 5e9 // range: [-5e9, +5e9)
91+
// Occasionally insert special values
92+
rval := random.IntN(50)
93+
if rval == 0 {
94+
v = math.NaN()
95+
} else if rval == 1 {
96+
v = math.Inf(1)
97+
} else if rval == 2 {
98+
v = math.Inf(-1)
99+
}
100+
return v
101+
}
102+
103+
func TestFloat64EncoderDecoder_RandomSequence(t *testing.T) {
104+
randSeed := uint64(time.Now().UnixNano())
105+
fmt.Printf("Using random seed: %d\n", randSeed)
106+
random := rand.New(rand.NewPCG(randSeed, 0))
107+
108+
n := 1 + random.IntN(2000)
109+
110+
var buf pkg.BitsWriter
111+
limiter := &pkg.SizeLimiter{}
112+
enc := &Float64Encoder{buf: buf, limiter: limiter}
113+
114+
for range n {
115+
v := randFloat(random)
116+
enc.Encode(v)
117+
if random.IntN(100) == 0 {
118+
enc.Reset()
119+
}
120+
}
121+
122+
// Get encoded bytes
123+
var bytesBuf pkg.BytesWriter
124+
enc.buf.Close()
125+
enc.WriteTo(&bytesBuf)
126+
encoded := bytesBuf.Bytes()
127+
128+
dec := &Float64Decoder{}
129+
dec.buf.Reset(encoded)
130+
131+
random = rand.New(rand.NewPCG(randSeed, 0))
132+
random.IntN(2000)
133+
134+
for i := range n {
135+
want := randFloat(random)
136+
var got float64
137+
if err := dec.Decode(&got); err != nil {
138+
t.Fatalf("decode failed at %d: %v", i, err)
139+
}
140+
if !equalFloat(got, want) {
141+
t.Errorf(
142+
"mismatch at %d: got %v (bits 0x%x), want %v (bits 0x%x)", i, got, math.Float64bits(got), want,
143+
math.Float64bits(want),
144+
)
145+
}
146+
if random.IntN(100) == 0 {
147+
dec.Reset()
148+
}
149+
}
150+
}

java/src/main/java/net/stef/codecs/Float64Encoder.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@ public void encode(double val) {
4242
int prevTrailing = trailingBits;
4343
int sigbits = 64 - leading - trailing;
4444

45-
if (leadingBits != -1 && leading >= leadingBits && trailing >= trailingBits) {
45+
if (leading >= leadingBits && trailing >= trailingBits) {
4646
// Fits in previous [leading..trailing] range.
47-
if (53-prevLeading-prevTrailing < sigbits) {
47+
if (53-prevLeading-prevTrailing <= sigbits) {
4848
// Current scheme is smaller than trying reset the range. Use the current scheme.
4949
buf.writeBits(0b10, 2);
5050
buf.writeBits(xorVal>>>prevTrailing, 64-prevLeading-prevTrailing);

stef-spec/specification.md

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -875,7 +875,31 @@ PrevVal = CurVal
875875
### Float64 Codec
876876

877877
64-bit IEEE numbers are encoded using
878-
[Gorilla encoding](https://www.vldb.org/pvldb/vol8/p1816-teller.pdf) (section 4.1.2)
878+
[Gorilla encoding](https://www.vldb.org/pvldb/vol8/p1816-teller.pdf) (section 4.1.2) with
879+
the following state of the encoder/decoder:
880+
881+
- The "previous" value at the start and at the codec reset is initialized to 0.0.
882+
- The "previous leading zeros" and "previous trailing zeros" values at the start
883+
and at the codec reset are initialized to 0.
884+
885+
We also modify the encoding with the following optimization for the case 3 of encoding
886+
scheme. The original algorithm says:
887+
888+
>If the block of meaningful bits falls within the block of previous meaningful bits,
889+
>i.e., there are at least as many leading zeros and as many trailing zeros as with
890+
>the previous value, use that information for the block position and
891+
>just store the meaningful XORed value.
892+
893+
We modify this clause in the following way:
894+
895+
If the block of meaningful bits falls within the block of previous meaningful bits,
896+
i.e., there are at least as many leading zeros and as many trailing zeros as with
897+
the previous value, calculate the total number of bits require for encoding via schema 3a
898+
(Control bit '0') and for encoding via schema 3b (Control bit '1') and choose the
899+
schema with fewer total number of bits encoding. The net result of this optimization is
900+
that if the condition `53-prevLeading-prevTrailing > sigbits` is true it is beneficial
901+
to to switch to schema 3b with new values of leading and trailing zeros even though
902+
the XOR value fits in the previous values of leading and trailing zeros.
879903

880904
### Bool Codec
881905

0 commit comments

Comments
 (0)