Skip to content
Open
5 changes: 4 additions & 1 deletion encoding/thrift/encode.go
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,10 @@ encodeFields:
}
}

if !f.flags.have(required) && x.IsZero() {
// Always write optional fields for regular structs, even if zero-valued.
// This ensures fields like NullCount=0 are present in Parquet statistics.
// However, unions must skip zero values to maintain exactly one field.
if !f.flags.have(required) && x.IsZero() && enc.union {
continue encodeFields
}

Expand Down
6 changes: 5 additions & 1 deletion format/parquet_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,11 @@ func TestMarshalUnmarshalSchemaMetadata(t *testing.T) {
Name: "hello",
},
},
RowGroups: []format.RowGroup{},
RowGroups: []format.RowGroup{},
KeyValueMetadata: []format.KeyValue{},
CreatedBy: "",
ColumnOrders: []format.ColumnOrder{},
FooterSigningKeyMetadata: []byte{},
}

b, err := thrift.Marshal(protocol, metadata)
Expand Down
62 changes: 62 additions & 0 deletions nullcount_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package parquet_test

import (
"bytes"
"testing"

"github.com/parquet-go/parquet-go"
)

// TestNullCountAlwaysWritten verifies that NullCount=0 is always written to
// the file statistics, ensuring compatibility with strict Parquet consumers
// like Snowflake that require explicit null counts.
func TestNullCountAlwaysWritten(t *testing.T) {
type Row struct {
ID int64 `parquet:"id"`
Name string `parquet:"name"`
}

rows := []Row{
{ID: 1, Name: "Alice"},
{ID: 2, Name: "Bob"},
{ID: 3, Name: "Charlie"},
}

buf := new(bytes.Buffer)
writer := parquet.NewGenericWriter[Row](buf)

if _, err := writer.Write(rows); err != nil {
t.Fatalf("failed to write rows: %v", err)
}

if err := writer.Close(); err != nil {
t.Fatalf("failed to close writer: %v", err)
}

// Read the file back
fileBytes := buf.Bytes()
reader := bytes.NewReader(fileBytes)
pf, err := parquet.OpenFile(reader, int64(len(fileBytes)))
if err != nil {
t.Fatalf("failed to open parquet file: %v", err)
}

// Verify that column chunks have NullCount set to 0
metadata := pf.Metadata()
if len(metadata.RowGroups) == 0 {
t.Fatal("No row groups found")
}

for rgIdx, rowGroup := range metadata.RowGroups {
for colIdx, col := range rowGroup.Columns {
// NullCount should be 0 for all non-nullable columns
stats := col.MetaData.Statistics
nullCount := stats.NullCount
if nullCount != 0 {
t.Errorf("RowGroup[%d].Column[%d]: expected NullCount=0, got %d",
rgIdx, colIdx, nullCount)
}
t.Logf("RowGroup[%d].Column[%d]: NullCount=%d ✓", rgIdx, colIdx, nullCount)
}
}
}
Loading