@@ -35,10 +35,19 @@ type FieldNameMap struct {
3535 maxKeyLength int
3636 all []caching.Pair
3737 trie * caching.TrieTree
38- hash * caching. HashMap
38+ hash map [ string ]unsafe. Pointer
3939}
4040
41- // Set sets the field descriptor for the given key
41+ func NewFieldNameMap () * FieldNameMap {
42+ return & FieldNameMap {
43+ hash : make (map [string ]unsafe.Pointer , defaultMapSize ),
44+ }
45+ }
46+
47+ // Set sets the field descriptor for the given key.
48+ //
49+ // NOTICE: It set to hash map by default. If user want to use trie tree,
50+ // please call Build() after all Set() calls.
4251func (ft * FieldNameMap ) Set (key string , field unsafe.Pointer ) (exist bool ) {
4352 if len (key ) > ft .maxKeyLength {
4453 ft .maxKeyLength = len (key )
@@ -59,7 +68,7 @@ func (ft FieldNameMap) Get(k string) unsafe.Pointer {
5968 if ft .trie != nil {
6069 return (unsafe .Pointer )(ft .trie .Get (k ))
6170 } else if ft .hash != nil {
62- return (unsafe .Pointer )(ft .hash . Get ( k ) )
71+ return (unsafe .Pointer )(ft .hash [ k ] )
6372 }
6473 return nil
6574}
@@ -72,7 +81,7 @@ func (ft FieldNameMap) All() []caching.Pair {
7281// Size returns the size of the map
7382func (ft FieldNameMap ) Size () int {
7483 if ft .hash != nil {
75- return ft .hash . Size ( )
84+ return len ( ft .hash )
7685 } else if ft .trie != nil {
7786 return ft .trie .Size ()
7887 }
@@ -81,88 +90,90 @@ func (ft FieldNameMap) Size() int {
8190
8291// Build builds the map.
8392// It will try to build a trie tree if the dispersion of keys is higher enough (min).
84- func (ft * FieldNameMap ) Build () {
93+ func (ft * FieldNameMap ) Build (noTrieTree bool ) {
8594 if len (ft .all ) == 0 {
8695 return
8796 }
8897
8998 var empty unsafe.Pointer
9099
91- // statistics the distrubution for each position:
92- // - primary slice store the position as its index
93- // - secondary map used to merge values with same char at the same position
94- var positionDispersion = make ([]map [byte ][]int , ft .maxKeyLength )
100+ if ! noTrieTree {
101+ // statistics the distrubution for each position:
102+ // - primary slice store the position as its index
103+ // - secondary map used to merge values with same char at the same position
104+ var positionDispersion = make ([]map [byte ][]int , ft .maxKeyLength )
105+
106+ for i , v := range ft .all {
107+ for j := ft .maxKeyLength - 1 ; j >= 0 ; j -- {
108+ if v .Key == "" {
109+ // empty key, especially store
110+ empty = v .Val
111+ }
112+ // get the char at the position, defualt (position beyonds key range) is ASCII 0
113+ var c = byte (0 )
114+ if j < len (v .Key ) {
115+ c = v .Key [j ]
116+ }
117+
118+ if positionDispersion [j ] == nil {
119+ positionDispersion [j ] = make (map [byte ][]int , 16 )
120+ }
121+ // recoder the index i of the value with same char c at the same position j
122+ positionDispersion [j ][c ] = append (positionDispersion [j ][c ], i )
123+ }
124+ }
95125
96- for i , v := range ft .all {
97- for j := ft .maxKeyLength - 1 ; j >= 0 ; j -- {
98- if v .Key == "" {
99- // empty key, especially store
100- empty = v .Val
126+ // calculate the best position which has the highest dispersion
127+ var idealPos = - 1
128+ var min = defaultMaxBucketSize
129+ var count = len (ft .all )
130+
131+ for i := ft .maxKeyLength - 1 ; i >= 0 ; i -- {
132+ cd := positionDispersion [i ]
133+ l := len (cd )
134+ // calculate the dispersion (average bucket size)
135+ f := float64 (count ) / float64 (l )
136+ if f < min {
137+ min = f
138+ idealPos = i
101139 }
102- // get the char at the position, defualt (position beyonds key range) is ASCII 0
103- var c = byte (0 )
104- if j < len (v .Key ) {
105- c = v .Key [j ]
140+ // 1 means all the value store in different bucket, no need to continue calulating
141+ if min == 1 {
142+ break
106143 }
144+ }
107145
108- if positionDispersion [j ] == nil {
109- positionDispersion [j ] = make (map [byte ][]int , 16 )
146+ if idealPos != - 1 {
147+ // find the best position, build a trie tree
148+ ft .hash = nil
149+ ft .trie = & caching.TrieTree {}
150+ // NOTICE: we only use a two-layer tree here, for better performance
151+ ft .trie .Positions = append (ft .trie .Positions , idealPos )
152+ // set all key-values to the trie tree
153+ for _ , v := range ft .all {
154+ ft .trie .Set (v .Key , v .Val )
110155 }
111- // recoder the index i of the value with same char c at the same position j
112- positionDispersion [j ][c ] = append (positionDispersion [j ][c ], i )
156+ if empty != nil {
157+ ft .trie .Empty = empty
158+ }
159+ return
113160 }
114161 }
115162
116- // calculate the best position which has the highest dispersion
117- var idealPos = - 1
118- var min = defaultMaxBucketSize
119- var count = len (ft .all )
120-
121- for i := ft .maxKeyLength - 1 ; i >= 0 ; i -- {
122- cd := positionDispersion [i ]
123- l := len (cd )
124- // calculate the dispersion (average bucket size)
125- f := float64 (count ) / float64 (l )
126- if f < min {
127- min = f
128- idealPos = i
129- }
130- // 1 means all the value store in different bucket, no need to continue calulating
131- if min == 1 {
132- break
163+ // no ideal position or force use hash map
164+ ft .trie = nil
165+ ft .hash = make (map [string ]unsafe.Pointer , len (ft .all ))
166+ // set all key-values to the trie tree
167+ for _ , v := range ft .all {
168+ // caching.HashMap does not support duplicate key, so must check if the key exists before set
169+ // WARN: if the key exists, the value WON'T be replaced
170+ o := ft .hash [v .Key ]
171+ if o == nil {
172+ ft .hash [v .Key ] = v .Val
133173 }
134174 }
135-
136- if idealPos != - 1 {
137- // find the best position, build a trie tree
138- ft .hash = nil
139- ft .trie = & caching.TrieTree {}
140- // NOTICE: we only use a two-layer tree here, for better performance
141- ft .trie .Positions = append (ft .trie .Positions , idealPos )
142- // set all key-values to the trie tree
143- for _ , v := range ft .all {
144- ft .trie .Set (v .Key , v .Val )
145- }
146- if empty != nil {
147- ft .trie .Empty = empty
148- }
149-
150- } else {
151- // no ideal position, build a hash map
152- ft .trie = nil
153- ft .hash = caching .NewHashMap (len (ft .all ), defaultHashMapLoadFactor )
154- // set all key-values to the trie tree
155- for _ , v := range ft .all {
156- // caching.HashMap does not support duplicate key, so must check if the key exists before set
157- // WARN: if the key exists, the value WON'T be replaced
158- o := ft .hash .Get (v .Key )
159- if o == nil {
160- ft .hash .Set (v .Key , v .Val )
161- }
162- }
163- if empty != nil {
164- ft .hash .Set ("" , empty )
165- }
175+ if empty != nil {
176+ ft .hash ["" ] = empty
166177 }
167178}
168179
0 commit comments