Skip to content

Commit 007a37e

Browse files
authored
opt:(utils) add parse option ForceHashMapAsFieldNameMap to reduce m… (#112)
1 parent 317b54f commit 007a37e

File tree

4 files changed

+127
-86
lines changed

4 files changed

+127
-86
lines changed

internal/util/fieldmap.go

Lines changed: 81 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,19 @@ type FieldNameMap struct {
3535
maxKeyLength int
3636
all []caching.Pair
3737
trie *caching.TrieTree
38-
hash *caching.HashMap
38+
hash map[string]unsafe.Pointer
3939
}
4040

41-
// Set sets the field descriptor for the given key
41+
func NewFieldNameMap() *FieldNameMap {
42+
return &FieldNameMap{
43+
hash: make(map[string]unsafe.Pointer, defaultMapSize),
44+
}
45+
}
46+
47+
// Set sets the field descriptor for the given key.
48+
//
49+
// NOTICE: It set to hash map by default. If user want to use trie tree,
50+
// please call Build() after all Set() calls.
4251
func (ft *FieldNameMap) Set(key string, field unsafe.Pointer) (exist bool) {
4352
if len(key) > ft.maxKeyLength {
4453
ft.maxKeyLength = len(key)
@@ -59,7 +68,7 @@ func (ft FieldNameMap) Get(k string) unsafe.Pointer {
5968
if ft.trie != nil {
6069
return (unsafe.Pointer)(ft.trie.Get(k))
6170
} else if ft.hash != nil {
62-
return (unsafe.Pointer)(ft.hash.Get(k))
71+
return (unsafe.Pointer)(ft.hash[k])
6372
}
6473
return nil
6574
}
@@ -72,7 +81,7 @@ func (ft FieldNameMap) All() []caching.Pair {
7281
// Size returns the size of the map
7382
func (ft FieldNameMap) Size() int {
7483
if ft.hash != nil {
75-
return ft.hash.Size()
84+
return len(ft.hash)
7685
} else if ft.trie != nil {
7786
return ft.trie.Size()
7887
}
@@ -81,88 +90,90 @@ func (ft FieldNameMap) Size() int {
8190

8291
// Build builds the map.
8392
// It will try to build a trie tree if the dispersion of keys is higher enough (min).
84-
func (ft *FieldNameMap) Build() {
93+
func (ft *FieldNameMap) Build(noTrieTree bool) {
8594
if len(ft.all) == 0 {
8695
return
8796
}
8897

8998
var empty unsafe.Pointer
9099

91-
// statistics the distrubution for each position:
92-
// - primary slice store the position as its index
93-
// - secondary map used to merge values with same char at the same position
94-
var positionDispersion = make([]map[byte][]int, ft.maxKeyLength)
100+
if !noTrieTree {
101+
// statistics the distrubution for each position:
102+
// - primary slice store the position as its index
103+
// - secondary map used to merge values with same char at the same position
104+
var positionDispersion = make([]map[byte][]int, ft.maxKeyLength)
105+
106+
for i, v := range ft.all {
107+
for j := ft.maxKeyLength - 1; j >= 0; j-- {
108+
if v.Key == "" {
109+
// empty key, especially store
110+
empty = v.Val
111+
}
112+
// get the char at the position, defualt (position beyonds key range) is ASCII 0
113+
var c = byte(0)
114+
if j < len(v.Key) {
115+
c = v.Key[j]
116+
}
117+
118+
if positionDispersion[j] == nil {
119+
positionDispersion[j] = make(map[byte][]int, 16)
120+
}
121+
// recoder the index i of the value with same char c at the same position j
122+
positionDispersion[j][c] = append(positionDispersion[j][c], i)
123+
}
124+
}
95125

96-
for i, v := range ft.all {
97-
for j := ft.maxKeyLength - 1; j >= 0; j-- {
98-
if v.Key == "" {
99-
// empty key, especially store
100-
empty = v.Val
126+
// calculate the best position which has the highest dispersion
127+
var idealPos = -1
128+
var min = defaultMaxBucketSize
129+
var count = len(ft.all)
130+
131+
for i := ft.maxKeyLength - 1; i >= 0; i-- {
132+
cd := positionDispersion[i]
133+
l := len(cd)
134+
// calculate the dispersion (average bucket size)
135+
f := float64(count) / float64(l)
136+
if f < min {
137+
min = f
138+
idealPos = i
101139
}
102-
// get the char at the position, defualt (position beyonds key range) is ASCII 0
103-
var c = byte(0)
104-
if j < len(v.Key) {
105-
c = v.Key[j]
140+
// 1 means all the value store in different bucket, no need to continue calulating
141+
if min == 1 {
142+
break
106143
}
144+
}
107145

108-
if positionDispersion[j] == nil {
109-
positionDispersion[j] = make(map[byte][]int, 16)
146+
if idealPos != -1 {
147+
// find the best position, build a trie tree
148+
ft.hash = nil
149+
ft.trie = &caching.TrieTree{}
150+
// NOTICE: we only use a two-layer tree here, for better performance
151+
ft.trie.Positions = append(ft.trie.Positions, idealPos)
152+
// set all key-values to the trie tree
153+
for _, v := range ft.all {
154+
ft.trie.Set(v.Key, v.Val)
110155
}
111-
// recoder the index i of the value with same char c at the same position j
112-
positionDispersion[j][c] = append(positionDispersion[j][c], i)
156+
if empty != nil {
157+
ft.trie.Empty = empty
158+
}
159+
return
113160
}
114161
}
115162

116-
// calculate the best position which has the highest dispersion
117-
var idealPos = -1
118-
var min = defaultMaxBucketSize
119-
var count = len(ft.all)
120-
121-
for i := ft.maxKeyLength - 1; i >= 0; i-- {
122-
cd := positionDispersion[i]
123-
l := len(cd)
124-
// calculate the dispersion (average bucket size)
125-
f := float64(count) / float64(l)
126-
if f < min {
127-
min = f
128-
idealPos = i
129-
}
130-
// 1 means all the value store in different bucket, no need to continue calulating
131-
if min == 1 {
132-
break
163+
// no ideal position or force use hash map
164+
ft.trie = nil
165+
ft.hash = make(map[string]unsafe.Pointer, len(ft.all))
166+
// set all key-values to the trie tree
167+
for _, v := range ft.all {
168+
// caching.HashMap does not support duplicate key, so must check if the key exists before set
169+
// WARN: if the key exists, the value WON'T be replaced
170+
o := ft.hash[v.Key]
171+
if o == nil {
172+
ft.hash[v.Key] = v.Val
133173
}
134174
}
135-
136-
if idealPos != -1 {
137-
// find the best position, build a trie tree
138-
ft.hash = nil
139-
ft.trie = &caching.TrieTree{}
140-
// NOTICE: we only use a two-layer tree here, for better performance
141-
ft.trie.Positions = append(ft.trie.Positions, idealPos)
142-
// set all key-values to the trie tree
143-
for _, v := range ft.all {
144-
ft.trie.Set(v.Key, v.Val)
145-
}
146-
if empty != nil {
147-
ft.trie.Empty = empty
148-
}
149-
150-
} else {
151-
// no ideal position, build a hash map
152-
ft.trie = nil
153-
ft.hash = caching.NewHashMap(len(ft.all), defaultHashMapLoadFactor)
154-
// set all key-values to the trie tree
155-
for _, v := range ft.all {
156-
// caching.HashMap does not support duplicate key, so must check if the key exists before set
157-
// WARN: if the key exists, the value WON'T be replaced
158-
o := ft.hash.Get(v.Key)
159-
if o == nil {
160-
ft.hash.Set(v.Key, v.Val)
161-
}
162-
}
163-
if empty != nil {
164-
ft.hash.Set("", empty)
165-
}
175+
if empty != nil {
176+
ft.hash[""] = empty
166177
}
167178
}
168179

internal/util/fieldmap_test.go

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,20 +16,42 @@
1616

1717
package util
1818

19-
import "testing"
19+
import (
20+
"testing"
21+
"unsafe"
22+
)
2023

21-
func TestEmptyFieldMap(t *testing.T) {
22-
// empty test
23-
ids := FieldIDMap{}
24-
if ids.Get(1) != nil {
25-
t.Fatalf("expect nil")
24+
func TestFieldMap(t *testing.T) {
25+
ids := NewFieldNameMap()
26+
v1 := "a"
27+
ids.Set("1", unsafe.Pointer(&v1))
28+
v2 := "b"
29+
ids.Set("2", unsafe.Pointer(&v2))
30+
ids.Set("1", unsafe.Pointer(&v2))
31+
ids.Set("", unsafe.Pointer(&v1))
32+
33+
ids.Build(false)
34+
35+
if ids.Get("1") != unsafe.Pointer(&v2) {
36+
t.Fatalf("expect 1")
2637
}
27-
names := FieldNameMap{}
28-
if names.Get("a") != nil {
29-
t.Fatalf("expect nil")
38+
if ids.Get("2") != unsafe.Pointer(&v2) {
39+
t.Fatalf("expect 1")
3040
}
31-
names.Build()
32-
if names.Get("a") != nil {
33-
t.Fatalf("expect nil")
41+
if ids.Get("") != unsafe.Pointer(&v1) {
42+
t.Fatalf("expect 1")
3443
}
44+
45+
ids = NewFieldNameMap()
46+
ids.Set("", unsafe.Pointer(&v1))
47+
ids.Set("1", unsafe.Pointer(&v2))
48+
ids.Build(true)
49+
50+
if ids.Get("") != unsafe.Pointer(&v1) {
51+
t.Fatalf("expect 1")
52+
}
53+
if ids.Get("1") != unsafe.Pointer(&v2) {
54+
t.Fatalf("expect 2")
55+
}
56+
3557
}

proto/idl.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@ type Options struct {
4848
ParseFunctionMode meta.ParseFunctionMode // not implemented.
4949

5050
EnableProtoBase bool // not implemented.
51+
52+
// ForceHashMapAsFieldNameMap indicates to use hash map as underlying field name map.
53+
// By default we try to use trie tree as field name map, which is usually faster than go map but consume more memory.
54+
ForceHashMapAsFieldNameMap bool
5155
}
5256

5357
// NewDefaultOptions creates a default Options.
@@ -258,7 +262,7 @@ func parseMessage(ctx context.Context, msgDesc *desc.MessageDescriptor, cache co
258262
md.names.Set(name, unsafe.Pointer(fieldDesc))
259263
md.names.Set(jsonName, unsafe.Pointer(fieldDesc))
260264
}
261-
md.names.Build()
265+
md.names.Build(opts.ForceHashMapAsFieldNameMap)
262266

263267
return ty, nil
264268
}

thrift/idl.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,10 @@ type Options struct {
100100

101101
// ApiBodyFastPath indicates `api.body` will change alias-name of root field, which can avoid search http-body on them
102102
ApiBodyFastPath bool
103+
104+
// ForceHashMapAsFieldNameMap indicates to use hash map as underlying field name map.
105+
// By default we try to use trie tree as field name map, which is usually faster than go map but consume more memory.
106+
ForceHashMapAsFieldNameMap bool
103107
}
104108

105109
// NewDefaultOptions creates a default Options.
@@ -479,7 +483,7 @@ func parseRequest(ctx context.Context, isStreaming bool, fn *parser.Function, tr
479483
}
480484
wrappedTyDsc.Struct().ids.Set(int32(reqAst.ID), unsafe.Pointer(reqField))
481485
wrappedTyDsc.Struct().names.Set(reqAst.Name, unsafe.Pointer(reqField))
482-
wrappedTyDsc.Struct().names.Build()
486+
wrappedTyDsc.Struct().names.Build(opts.ForceHashMapAsFieldNameMap)
483487
return wrappedTyDsc, hasRequestBase, nil
484488
}
485489

@@ -528,7 +532,7 @@ func parseResponse(ctx context.Context, isStreaming bool, fn *parser.Function, t
528532
wrappedResp.Struct().ids.Set(int32(exp.ID), unsafe.Pointer(exceptionField))
529533
wrappedResp.Struct().names.Set(exp.Name, unsafe.Pointer(exceptionField))
530534
}
531-
wrappedResp.Struct().names.Build()
535+
wrappedResp.Struct().names.Build(opts.ForceHashMapAsFieldNameMap)
532536
return wrappedResp, nil
533537
}
534538

@@ -758,7 +762,7 @@ func parseType(ctx context.Context, t *parser.Type, tree *parser.Thrift, cache c
758762

759763
}
760764
// buidl field name map
761-
ty.Struct().names.Build()
765+
ty.Struct().names.Build(opts.ForceHashMapAsFieldNameMap)
762766
return ty, nil
763767
}
764768
}

0 commit comments

Comments
 (0)