Add float64 codec tests (#299)

tigrannajaryan · web-flow · commit 9d3640c8068f · 2025-12-18T09:46:45.000-05:00
Resolves #292 - Added tests to Go and Java implementation. - Removed unnecessary code in the implementation. - Update spec description.
diff --git a/go/pkg/encoders/float64.go b/go/pkg/encoders/float64.go
@@ -48,9 +48,9 @@ func (e *Float64Encoder) Encode(val float64) {
 	prevTrailing := e.trailingBits
 	sigbits := 64 - leading - trailing
 
-	if prevLeading != -1 && leading >= prevLeading && trailing >= prevTrailing {
+	if leading >= prevLeading && trailing >= prevTrailing {
 		// Fits in previous [leading..trailing] range.
-		if 53-prevLeading-prevTrailing < sigbits {
+		if 53-prevLeading-prevTrailing <= sigbits {
 			// Current scheme is smaller than trying reset the range. Use the current scheme.
 			e.buf.WriteBits(0b10, 2)
 			bitCount := uint(64 - prevLeading - prevTrailing)
diff --git a/go/pkg/encoders/float64_test.go b/go/pkg/encoders/float64_test.go
@@ -0,0 +1,150 @@
+package encoders
+
+import (
+	"fmt"
+	"math"
+	"math/rand/v2"
+	"testing"
+	"time"
+
+	"github.com/splunk/stef/go/pkg"
+)
+
+func equalFloat(a, b float64) bool {
+	if math.IsNaN(a) && math.IsNaN(b) {
+		return true
+	}
+	return a == b
+}
+
+func verifyDecode(t *testing.T, values []float64) {
+	// Encode
+	var buf pkg.BitsWriter
+	limiter := &pkg.SizeLimiter{}
+	enc := &Float64Encoder{buf: buf, limiter: limiter}
+
+	for _, v := range values {
+		enc.Encode(v)
+	}
+
+	// Get encoded bytes
+	var bytesBuf pkg.BytesWriter
+	enc.buf.Close()
+	enc.WriteTo(&bytesBuf)
+	encoded := bytesBuf.Bytes()
+
+	// Decode
+	dec := &Float64Decoder{}
+	dec.buf.Reset(encoded)
+
+	for i, want := range values {
+		var got float64
+		if err := dec.Decode(&got); err != nil {
+			t.Fatalf("decode failed at %d: %v", i, err)
+		}
+		if !equalFloat(got, want) {
+			t.Errorf(
+				"mismatch at %d: got %v (bits 0x%x), want %v (bits 0x%x)", i, got, math.Float64bits(got), want,
+				math.Float64bits(want),
+			)
+		}
+	}
+}
+
+func TestFloat64EncoderDecoder_Basic(t *testing.T) {
+	values := []float64{1.0, 1.0, 2.0, 2.0, 3.1415, 3.1415, -1.0, 0.0, -0.0, math.NaN(), math.Inf(1), math.Inf(-1)}
+	verifyDecode(t, values)
+}
+
+func TestFloat64EncoderDecoder_Bits(t *testing.T) {
+	bits := []uint64{
+		0x000000000000,
+		0x0FFFFFFFFFF0,
+		0x0F0000000000,
+		0x0F0000000F00,
+		0,
+		^uint64(0),
+		0,
+	}
+
+	values := []float64{}
+	for _, bitVal := range bits {
+		values = append(values, math.Float64frombits(bitVal))
+	}
+
+	verifyDecode(t, values)
+}
+
+func TestFloat64EncoderDecoder_LeadingTrailing(t *testing.T) {
+	// Values with only one bit difference, to test leading/trailing zeros logic
+	base := math.Float64frombits(0x3FF0000000000000) // 1.0
+	values := []float64{base}
+	for i := 0; i < 64; i++ {
+		values = append(values, math.Float64frombits(math.Float64bits(base)^(1<<uint(i))))
+	}
+	verifyDecode(t, values)
+}
+
+func randFloat(random *rand.Rand) float64 {
+	// Generate random float64, including negative values
+	v := random.Float64()*1e10 - 5e9 // range: [-5e9, +5e9)
+	// Occasionally insert special values
+	rval := random.IntN(50)
+	if rval == 0 {
+		v = math.NaN()
+	} else if rval == 1 {
+		v = math.Inf(1)
+	} else if rval == 2 {
+		v = math.Inf(-1)
+	}
+	return v
+}
+
+func TestFloat64EncoderDecoder_RandomSequence(t *testing.T) {
+	randSeed := uint64(time.Now().UnixNano())
+	fmt.Printf("Using random seed: %d\n", randSeed)
+	random := rand.New(rand.NewPCG(randSeed, 0))
+
+	n := 1 + random.IntN(2000)
+
+	var buf pkg.BitsWriter
+	limiter := &pkg.SizeLimiter{}
+	enc := &Float64Encoder{buf: buf, limiter: limiter}
+
+	for range n {
+		v := randFloat(random)
+		enc.Encode(v)
+		if random.IntN(100) == 0 {
+			enc.Reset()
+		}
+	}
+
+	// Get encoded bytes
+	var bytesBuf pkg.BytesWriter
+	enc.buf.Close()
+	enc.WriteTo(&bytesBuf)
+	encoded := bytesBuf.Bytes()
+
+	dec := &Float64Decoder{}
+	dec.buf.Reset(encoded)
+
+	random = rand.New(rand.NewPCG(randSeed, 0))
+	random.IntN(2000)
+
+	for i := range n {
+		want := randFloat(random)
+		var got float64
+		if err := dec.Decode(&got); err != nil {
+			t.Fatalf("decode failed at %d: %v", i, err)
+		}
+		if !equalFloat(got, want) {
+			t.Errorf(
+				"mismatch at %d: got %v (bits 0x%x), want %v (bits 0x%x)", i, got, math.Float64bits(got), want,
+				math.Float64bits(want),
+			)
+		}
+		if random.IntN(100) == 0 {
+			dec.Reset()
+		}
+	}
+}
diff --git a/java/src/main/java/net/stef/codecs/Float64Encoder.java b/java/src/main/java/net/stef/codecs/Float64Encoder.java
@@ -42,9 +42,9 @@ public void encode(double val) {
         int prevTrailing = trailingBits;
         int sigbits = 64 - leading - trailing;
 
-        if (leadingBits != -1 && leading >= leadingBits && trailing >= trailingBits) {
+        if (leading >= leadingBits && trailing >= trailingBits) {
             // Fits in previous [leading..trailing] range.
-            if (53-prevLeading-prevTrailing < sigbits) {
+            if (53-prevLeading-prevTrailing <= sigbits) {
                 // Current scheme is smaller than trying reset the range. Use the current scheme.
                 buf.writeBits(0b10, 2);
                 buf.writeBits(xorVal>>>prevTrailing, 64-prevLeading-prevTrailing);
diff --git a/stef-spec/specification.md b/stef-spec/specification.md
@@ -875,7 +875,31 @@ PrevVal = CurVal
 ### Float64 Codec
 
 64-bit IEEE numbers are encoded using 
-[Gorilla encoding](https://www.vldb.org/pvldb/vol8/p1816-teller.pdf) (section 4.1.2)
+[Gorilla encoding](https://www.vldb.org/pvldb/vol8/p1816-teller.pdf) (section 4.1.2) with
+the following state of the encoder/decoder:
+
+- The "previous" value at the start and at the codec reset is initialized to 0.0.
+- The "previous leading zeros" and "previous trailing zeros" values at the start
+  and at the codec reset are initialized to 0.
+
+We also modify the encoding with the following optimization for the case 3 of encoding
+scheme. The original algorithm says:
+
+>If the block of meaningful bits falls within the block of previous meaningful bits,
+>i.e., there are at least as many leading zeros and as many trailing zeros as with
+>the previous value, use that information for the block position and
+>just store the meaningful XORed value.
+
+We modify this clause in the following way:
+
+If the block of meaningful bits falls within the block of previous meaningful bits,
+i.e., there are at least as many leading zeros and as many trailing zeros as with
+the previous value, calculate the total number of bits require for encoding via schema 3a 
+(Control bit '0') and for encoding via schema 3b (Control bit '1') and choose the
+schema with fewer total number of bits encoding. The net result of this optimization is
+that if the condition `53-prevLeading-prevTrailing > sigbits` is true it is beneficial
+to to switch to schema 3b with new values of leading and trailing zeros even though
+the XOR value fits in the previous values of leading and trailing zeros.
 
 ### Bool Codec