Skip to content

Commit 3e4678e

Browse files
authored
Merge pull request #163 from maxmind/greg/eng-3415
Cache nested structures in Unmarshaler
2 parents c781b92 + 293bf8e commit 3e4678e

File tree

6 files changed

+174
-49
lines changed

6 files changed

+174
-49
lines changed

CHANGELOG.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
11
# CHANGELOG
22

3+
## 1.2.0
4+
5+
* The `mmdbtype.Unmarshaler` now caches nested structures, maps and slices,
6+
in addition to top-level values. This improves performance when loading
7+
databases with shared nested data structures.
8+
* The zero value of `mmdbtype.Unmarshaler` is now documented as safe to use
9+
for unmarshaling without caching enabled. Use `NewUnmarshaler()` when you
10+
want caching.
11+
312
## 1.1.0 (2025-10-08)
413

514
* Removed unnecessary deep copies in inserter. GitHub #119.

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ module github.com/maxmind/mmdbwriter
33
go 1.24.0
44

55
require (
6-
github.com/oschwald/maxminddb-golang/v2 v2.0.0
6+
github.com/oschwald/maxminddb-golang/v2 v2.1.0
77
github.com/stretchr/testify v1.11.1
88
go4.org/netipx v0.0.0-20231129151722-fdeea329fbba
99
)

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
22
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
33
github.com/oschwald/maxminddb-golang/v2 v2.0.0 h1:Gyljxck1kHbBxDgLM++NfDWBqvu1pWWfT8XbosSo0bo=
44
github.com/oschwald/maxminddb-golang/v2 v2.0.0/go.mod h1:gG4V88LsawPEqtbL1Veh1WRh+nVSYwXzJ1P5Fcn77g0=
5+
github.com/oschwald/maxminddb-golang/v2 v2.1.0 h1:2Iv7lmG9XtxuZA/jFAsd7LnZaC1E59pFsj5O/nU15pw=
6+
github.com/oschwald/maxminddb-golang/v2 v2.1.0/go.mod h1:gG4V88LsawPEqtbL1Veh1WRh+nVSYwXzJ1P5Fcn77g0=
57
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
68
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
79
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=

mmdbtype/types.go

Lines changed: 88 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,8 @@ func (t Int32) WriteTo(w writer) (int64, error) {
295295
}
296296

297297
// Map is the MaxMind DB map type.
298+
//
299+
//nolint:recvcheck // preexisting/interface
298300
type Map map[String]DataType
299301

300302
var _ DataType = Map(nil)
@@ -341,6 +343,11 @@ func (t Map) typeNum() typeNum {
341343

342344
// UnmarshalMaxMindDB implements the mmdbdata.Unmarshaler interface.
343345
func (t *Map) UnmarshalMaxMindDB(decoder *mmdbdata.Decoder) error {
346+
return t.unmarshalMaxMindDB(decoder, nil)
347+
}
348+
349+
// unmarshalMaxMindDB is the internal implementation that supports caching.
350+
func (t *Map) unmarshalMaxMindDB(decoder *mmdbdata.Decoder, cache map[uint]DataType) error {
344351
iter, size, err := decoder.ReadMap()
345352
if err != nil {
346353
return fmt.Errorf("reading Map: %w", err)
@@ -352,7 +359,7 @@ func (t *Map) UnmarshalMaxMindDB(decoder *mmdbdata.Decoder) error {
352359
return iterErr
353360
}
354361

355-
value, err := decodeDataTypeValue(decoder)
362+
value, err := decodeDataTypeValue(decoder, cache)
356363
if err != nil {
357364
return err
358365
}
@@ -511,6 +518,8 @@ func (t Pointer) WriteTo(w writer) (int64, error) {
511518
}
512519

513520
// Slice is the MaxMind DB array type.
521+
//
522+
//nolint:recvcheck // preexisting/interface
514523
type Slice []DataType
515524

516525
var _ DataType = Slice(nil)
@@ -557,6 +566,11 @@ func (t Slice) typeNum() typeNum {
557566

558567
// UnmarshalMaxMindDB implements the mmdbdata.Unmarshaler interface.
559568
func (t *Slice) UnmarshalMaxMindDB(decoder *mmdbdata.Decoder) error {
569+
return t.unmarshalMaxMindDB(decoder, nil)
570+
}
571+
572+
// unmarshalMaxMindDB is the internal implementation that supports caching.
573+
func (t *Slice) unmarshalMaxMindDB(decoder *mmdbdata.Decoder, cache map[uint]DataType) error {
560574
iter, size, err := decoder.ReadSlice()
561575
if err != nil {
562576
return fmt.Errorf("reading Slice: %w", err)
@@ -568,7 +582,7 @@ func (t *Slice) UnmarshalMaxMindDB(decoder *mmdbdata.Decoder) error {
568582
return iterErr
569583
}
570584

571-
value, err := decodeDataTypeValue(decoder)
585+
value, err := decodeDataTypeValue(decoder, cache)
572586
if err != nil {
573587
return err
574588
}
@@ -939,63 +953,98 @@ func writeCtrlByte(w writer, t DataType) (int64, error) {
939953
return numBytes, nil
940954
}
941955

956+
// isCacheableKind returns true if the given kind is worth caching. Currently,
957+
// we have primarily found a benefit with containers, not scalar values.
958+
// Potentially, longer strings may also benefit from caching, although it
959+
// may be even better to just intern them, either here or directly in
960+
// the maxminddb reader.
961+
func isCacheableKind(kind mmdbdata.Kind) bool {
962+
return kind == mmdbdata.KindMap || kind == mmdbdata.KindSlice
963+
}
964+
942965
// decodeDataTypeValue decodes a value from the decoder and returns the appropriate DataType.
943-
func decodeDataTypeValue(decoder *mmdbdata.Decoder) (DataType, error) {
966+
// If cache is provided (non-nil), it will check for cached values at the current decoder offset
967+
// and store newly decoded container types (Map, Slice) in the cache. Simple scalar types
968+
// are not cached as they are cheap to decode and caching them would waste memory.
969+
func decodeDataTypeValue(decoder *mmdbdata.Decoder, cache map[uint]DataType) (DataType, error) {
944970
kind, err := decoder.PeekKind()
945971
if err != nil {
946972
return nil, fmt.Errorf("peeking kind: %w", err)
947973
}
948974

975+
// Only check cache if provided and the type is worth caching
976+
// This avoids unnecessary map lookups for scalar types in tight loops
977+
useCache := cache != nil && isCacheableKind(kind)
978+
var offset uint
979+
if useCache {
980+
offset = decoder.Offset()
981+
if cached, ok := cache[offset]; ok {
982+
return cached, nil
983+
}
984+
}
985+
986+
var value DataType
949987
switch kind {
950988
case mmdbdata.KindString:
951-
var value String
952-
err := value.UnmarshalMaxMindDB(decoder)
953-
return value, err
989+
var v String
990+
err = v.UnmarshalMaxMindDB(decoder)
991+
value = v
954992
case mmdbdata.KindFloat64:
955-
var value Float64
956-
err := value.UnmarshalMaxMindDB(decoder)
957-
return value, err
993+
var v Float64
994+
err = v.UnmarshalMaxMindDB(decoder)
995+
value = v
958996
case mmdbdata.KindBytes:
959-
var value Bytes
960-
err := value.UnmarshalMaxMindDB(decoder)
961-
return value, err
997+
var v Bytes
998+
err = v.UnmarshalMaxMindDB(decoder)
999+
value = v
9621000
case mmdbdata.KindUint16:
963-
var value Uint16
964-
err := value.UnmarshalMaxMindDB(decoder)
965-
return value, err
1001+
var v Uint16
1002+
err = v.UnmarshalMaxMindDB(decoder)
1003+
value = v
9661004
case mmdbdata.KindUint32:
967-
var value Uint32
968-
err := value.UnmarshalMaxMindDB(decoder)
969-
return value, err
1005+
var v Uint32
1006+
err = v.UnmarshalMaxMindDB(decoder)
1007+
value = v
9701008
case mmdbdata.KindInt32:
971-
var value Int32
972-
err := value.UnmarshalMaxMindDB(decoder)
973-
return value, err
1009+
var v Int32
1010+
err = v.UnmarshalMaxMindDB(decoder)
1011+
value = v
9741012
case mmdbdata.KindUint64:
975-
var value Uint64
976-
err := value.UnmarshalMaxMindDB(decoder)
977-
return value, err
1013+
var v Uint64
1014+
err = v.UnmarshalMaxMindDB(decoder)
1015+
value = v
9781016
case mmdbdata.KindUint128:
979-
var value Uint128
980-
err := value.UnmarshalMaxMindDB(decoder)
981-
return &value, err // Return pointer for Uint128
1017+
var v Uint128
1018+
err = v.UnmarshalMaxMindDB(decoder)
1019+
value = &v // Return pointer for Uint128
9821020
case mmdbdata.KindBool:
983-
var value Bool
984-
err := value.UnmarshalMaxMindDB(decoder)
985-
return value, err
1021+
var v Bool
1022+
err = v.UnmarshalMaxMindDB(decoder)
1023+
value = v
9861024
case mmdbdata.KindFloat32:
987-
var value Float32
988-
err := value.UnmarshalMaxMindDB(decoder)
989-
return value, err
1025+
var v Float32
1026+
err = v.UnmarshalMaxMindDB(decoder)
1027+
value = v
9901028
case mmdbdata.KindMap:
991-
var value Map
992-
err := value.UnmarshalMaxMindDB(decoder)
993-
return value, err
1029+
var v Map
1030+
err = v.unmarshalMaxMindDB(decoder, cache)
1031+
value = v
9941032
case mmdbdata.KindSlice:
995-
var value Slice
996-
err := value.UnmarshalMaxMindDB(decoder)
997-
return value, err
1033+
var v Slice
1034+
err = v.unmarshalMaxMindDB(decoder, cache)
1035+
value = v
9981036
default:
9991037
return nil, fmt.Errorf("unsupported data type: %v", kind)
10001038
}
1039+
1040+
if err != nil {
1041+
return nil, err
1042+
}
1043+
1044+
// Store the decoded value in cache.
1045+
if useCache {
1046+
cache[offset] = value
1047+
}
1048+
1049+
return value, nil
10011050
}

mmdbtype/unmarshaler.go

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,23 @@ import (
77
// Unmarshaler implements the mmdbdata.Unmarshaler interface for converting
88
// MMDB data back into mmdbtype.DataType values. This is used when loading
99
// existing MMDB files to reconstruct the original data structures.
10+
//
11+
// The Unmarshaler caches decoded complex types (Map, Slice, Uint128) at all
12+
// nesting levels to improve performance when loading databases with shared
13+
// nested data structures. Simple scalar types are not cached as they are
14+
// cheap to decode.
15+
//
16+
// The zero value of Unmarshaler is safe to use and will unmarshal data
17+
// without caching. Use NewUnmarshaler() to create an Unmarshaler with
18+
// caching enabled for better performance when loading full databases.
1019
type Unmarshaler struct {
1120
cache map[uint]DataType
1221
result DataType
1322
}
1423

15-
// NewUnmarshaler creates a new Unmarshaler for converting MMDB data to mmdbtype values.
24+
// NewUnmarshaler creates a new Unmarshaler with caching enabled for converting
25+
// MMDB data to mmdbtype values. The cache improves performance when loading
26+
// databases with shared data structures by avoiding redundant decoding.
1627
func NewUnmarshaler() *Unmarshaler {
1728
return &Unmarshaler{
1829
cache: map[uint]DataType{},
@@ -21,18 +32,11 @@ func NewUnmarshaler() *Unmarshaler {
2132

2233
// UnmarshalMaxMindDB implements the mmdbdata.Unmarshaler interface.
2334
func (u *Unmarshaler) UnmarshalMaxMindDB(decoder *mmdbdata.Decoder) error {
24-
offset := decoder.Offset()
25-
if cached, ok := u.cache[offset]; ok {
26-
u.result = cached
27-
return nil
28-
}
29-
30-
value, err := decodeDataTypeValue(decoder)
35+
value, err := decodeDataTypeValue(decoder, u.cache)
3136
if err != nil {
3237
return err
3338
}
3439

35-
u.cache[offset] = value
3640
u.result = value
3741
return nil
3842
}

mmdbtype/unmarshaler_cache_test.go

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
package mmdbtype
2+
3+
import (
4+
"bytes"
5+
"testing"
6+
7+
"github.com/oschwald/maxminddb-golang/v2/mmdbdata"
8+
"github.com/stretchr/testify/assert"
9+
"github.com/stretchr/testify/require"
10+
)
11+
12+
// TestUnmarshalerCache verifies that the Unmarshaler has a cache field
13+
// and that NewUnmarshaler initializes it correctly.
14+
func TestUnmarshalerCache(t *testing.T) {
15+
u := NewUnmarshaler()
16+
assert.NotNil(t, u)
17+
18+
// The cache should be initialized (non-nil)
19+
assert.NotNil(t, u.cache)
20+
}
21+
22+
// TestZeroUnmarshaler verifies that the zero value of Unmarshaler is safe to use
23+
// and correctly unmarshals data without caching enabled.
24+
func TestZeroUnmarshaler(t *testing.T) {
25+
// Create test data with nested structures
26+
testData := Map{
27+
"outer": Map{
28+
"inner": Slice{
29+
String("value1"),
30+
String("value2"),
31+
Map{
32+
"deep": String("nested"),
33+
"num": Uint32(42),
34+
},
35+
},
36+
"other": Uint64(100),
37+
},
38+
"simple": String("test"),
39+
}
40+
41+
// Encode to bytes
42+
var buf bytes.Buffer
43+
dw := &dataWriter{Buffer: &buf}
44+
_, err := testData.WriteTo(dw)
45+
require.NoError(t, err)
46+
47+
var zeroUnmarshaler Unmarshaler
48+
decoder := mmdbdata.NewDecoder(buf.Bytes(), 0)
49+
err = zeroUnmarshaler.UnmarshalMaxMindDB(decoder)
50+
require.NoError(t, err)
51+
52+
decoded := zeroUnmarshaler.Result()
53+
require.NotNil(t, decoded)
54+
55+
// Verify the decoded data matches the original
56+
assert.True(t, testData.Equal(decoded), "decoded data should match original")
57+
58+
// Verify we can call Clear on zero value (should be safe)
59+
zeroUnmarshaler.Clear()
60+
assert.Nil(t, zeroUnmarshaler.Result())
61+
}

0 commit comments

Comments
 (0)