Skip to content

Commit 6d422a6

Browse files
authored
Merge pull request #30 from Taimoor-12/type-sizes
added type sizes info found within the data section
2 parents 20ef154 + 3608540 commit 6d422a6

File tree

3 files changed

+261
-29
lines changed

3 files changed

+261
-29
lines changed

cmd_metadata.go

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,12 @@ var predictMetadataFmts = []string{"pretty", "json"}
1313

1414
var completionsMetadata = &complete.Command{
1515
Flags: map[string]complete.Predictor{
16-
"--nocolor": predict.Nothing,
17-
"-h": predict.Nothing,
18-
"--help": predict.Nothing,
19-
"-f": predict.Set(predictMetadataFmts),
20-
"--format": predict.Set(predictMetadataFmts),
16+
"--nocolor": predict.Nothing,
17+
"--data-types": predict.Nothing,
18+
"-h": predict.Nothing,
19+
"--help": predict.Nothing,
20+
"-f": predict.Set(predictMetadataFmts),
21+
"--format": predict.Set(predictMetadataFmts),
2122
},
2223
}
2324

@@ -29,6 +30,8 @@ Options:
2930
General:
3031
--nocolor
3132
disable colored output.
33+
--data-types
34+
show data type sizes within the data section.
3235
--help, -h
3336
show help.
3437

lib/cmd_metadata.go

Lines changed: 60 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,10 @@ const (
1818

1919
// CmdMetadataFlags are flags expected by CmdMetadata.
2020
type CmdMetadataFlags struct {
21-
Help bool
22-
NoColor bool
23-
Format string
21+
Help bool
22+
NoColor bool
23+
Format string
24+
DataTypes bool
2425
}
2526

2627
// Init initializes the common flags available to CmdMetadata with sensible
@@ -44,6 +45,11 @@ func (f *CmdMetadataFlags) Init() {
4445
"format", "f", "",
4546
_h,
4647
)
48+
pflag.BoolVar(
49+
&f.DataTypes,
50+
"data-types", false,
51+
"show data type sizes within the data section.",
52+
)
4753
}
4854

4955
func CmdMetadata(f CmdMetadataFlags, args []string, printHelp func()) error {
@@ -71,7 +77,8 @@ func CmdMetadata(f CmdMetadataFlags, args []string, printHelp func()) error {
7177
}
7278

7379
// open tree.
74-
db, err := maxminddb.Open(args[0])
80+
mmdbFile := args[0]
81+
db, err := maxminddb.Open(mmdbFile)
7582
if err != nil {
7683
return fmt.Errorf("couldn't open mmdb file: %w", err)
7784
}
@@ -86,7 +93,7 @@ func CmdMetadata(f CmdMetadataFlags, args []string, printHelp func()) error {
8693
metadataSectionStartOffset := 0
8794

8895
// Offset of this separator is used to determine the metadata start section, data section end and data section size.
89-
offset, err := findSectionSeparator(args[0], MetadataStartMarker)
96+
offset, err := findSectionSeparator(mmdbFile, MetadataStartMarker)
9097
if err != nil {
9198
return fmt.Errorf("couldn't process the mmdb file: %w", err)
9299
}
@@ -96,32 +103,57 @@ func CmdMetadata(f CmdMetadataFlags, args []string, printHelp func()) error {
96103
}
97104
dataSectionEndOffset = int(offset)
98105
dataSectionSize = int(offset) - treeSize - 16
106+
var typeSizes TypeSizes
107+
if f.DataTypes {
108+
typeSizes, err = traverseDataSection(mmdbFile, int64(dataSectionStartOffset), int64(dataSectionEndOffset))
109+
if err != nil {
110+
return fmt.Errorf("couldn't process the mmdb file: %w", err)
111+
}
112+
}
99113
metadataSectionStartOffset = int(offset) + len(MetadataStartMarker)
100114

101115
if f.Format == "pretty" {
102116
fmtEntry := color.New(color.FgCyan)
103117
fmtVal := color.New(color.FgGreen)
104-
printlineGen := func(entryLen string) func(string, string) {
105-
return func(name string, val string) {
118+
printlineGen := func(indentSpace, entryLen string) func(string, string, string) {
119+
return func(name string, val string, valSimplified string) {
106120
fmt.Printf(
107-
"- %v %v\n",
121+
"%v- %v %v %v\n",
122+
indentSpace,
108123
fmtEntry.Sprintf("%-"+entryLen+"s", name),
109124
fmtVal.Sprintf("%v", val),
125+
fmtVal.Sprintf("%v", valSimplified),
110126
)
111127
}
112128
}
113-
printline := printlineGen("13")
114-
printline("Binary Format", binaryFmt)
115-
printline("Database Type", mdFromLib.DatabaseType)
116-
printline("IP Version", strconv.Itoa(int(mdFromLib.IPVersion)))
117-
printline("Record Size", strconv.Itoa(int(mdFromLib.RecordSize)))
118-
printline("Node Count", strconv.Itoa(int(mdFromLib.NodeCount)))
119-
printline("Tree Size", strconv.Itoa(treeSize))
120-
printline("Data Section Size", strconv.Itoa(dataSectionSize))
121-
printline("Data Section Start Offset", strconv.Itoa(dataSectionStartOffset))
122-
printline("Data Section End Offset", strconv.Itoa(dataSectionEndOffset))
123-
printline("Metadata Section Start Offset", strconv.Itoa(metadataSectionStartOffset))
124-
printline("Description", "")
129+
130+
printline := printlineGen("", "13")
131+
printline("Binary Format", binaryFmt, "")
132+
printline("Database Type", mdFromLib.DatabaseType, "")
133+
printline("IP Version", strconv.Itoa(int(mdFromLib.IPVersion)), "")
134+
printline("Record Size", strconv.Itoa(int(mdFromLib.RecordSize)), simplifySize(int64(mdFromLib.RecordSize)))
135+
printline("Node Count", strconv.Itoa(int(mdFromLib.NodeCount)), simplifySize(int64(mdFromLib.NodeCount)))
136+
printline("Tree Size", strconv.Itoa(treeSize), simplifySize(int64(treeSize)))
137+
printline("Data Section Size", strconv.Itoa(dataSectionSize), simplifySize(int64(dataSectionSize)))
138+
if f.DataTypes {
139+
typeSizePrintline := printlineGen(" ", "13")
140+
typeSizePrintline("Pointer Size", strconv.Itoa(int(typeSizes.PointerSize)), simplifySize(typeSizes.PointerSize))
141+
typeSizePrintline("UTF-8 String Size", strconv.Itoa(int(typeSizes.Utf8StringSize)), simplifySize(typeSizes.Utf8StringSize))
142+
typeSizePrintline("Double Size", strconv.Itoa(int(typeSizes.DoubleSize)), simplifySize(typeSizes.DoubleSize))
143+
typeSizePrintline("Bytes Size", strconv.Itoa(int(typeSizes.BytesSize)), simplifySize(typeSizes.BytesSize))
144+
typeSizePrintline("Unsigned 16-bit Integer Size", strconv.Itoa(int(typeSizes.Unsigned16bitIntSize)), simplifySize(typeSizes.Unsigned16bitIntSize))
145+
typeSizePrintline("Unsigned 32-bit Integer Size", strconv.Itoa(int(typeSizes.Unsigned32bitIntSize)), simplifySize(typeSizes.Unsigned32bitIntSize))
146+
typeSizePrintline("Signed 32-bit Integer Size", strconv.Itoa(int(typeSizes.Signed32bitIntSize)), simplifySize(typeSizes.Signed32bitIntSize))
147+
typeSizePrintline("Unsigned 64-bit Integer Size", strconv.Itoa(int(typeSizes.Unsigned64bitIntSize)), simplifySize(typeSizes.Unsigned64bitIntSize))
148+
typeSizePrintline("Unsigned 128-bit Integer Size", strconv.Itoa(int(typeSizes.Unsigned128bitIntSize)), simplifySize(typeSizes.Unsigned128bitIntSize))
149+
typeSizePrintline("Map Key-Value Pair Count", strconv.Itoa(int(typeSizes.MapKeyValueCount)), simplifySize(typeSizes.MapKeyValueCount))
150+
typeSizePrintline("Array Length", strconv.Itoa(int(typeSizes.ArrayLength)), simplifySize(typeSizes.ArrayLength))
151+
typeSizePrintline("Float Size", strconv.Itoa(int(typeSizes.FloatSize)), simplifySize(typeSizes.FloatSize))
152+
}
153+
printline("Data Section Start Offset", strconv.Itoa(dataSectionStartOffset), simplifySize(int64(dataSectionStartOffset)))
154+
printline("Data Section End Offset", strconv.Itoa(dataSectionEndOffset), simplifySize(int64(dataSectionEndOffset)))
155+
printline("Metadata Section Start Offset", strconv.Itoa(metadataSectionStartOffset), simplifySize(int64(metadataSectionStartOffset)))
156+
printline("Description", "", "")
125157
descKeys, descVals := sortedMapKeysAndVals(mdFromLib.Description)
126158
longestDescKeyLen := strconv.Itoa(len(longestStrInStringSlice(descKeys)))
127159
for i := 0; i < len(descKeys); i++ {
@@ -131,9 +163,13 @@ func CmdMetadata(f CmdMetadataFlags, args []string, printHelp func()) error {
131163
fmtVal.Sprintf("%v", descVals[i]),
132164
)
133165
}
134-
printline("Languages", strings.Join(mdFromLib.Languages, ", "))
135-
printline("Build Epoch", strconv.Itoa(int(mdFromLib.BuildEpoch)))
166+
printline("Languages", strings.Join(mdFromLib.Languages, ", "), "")
167+
printline("Build Epoch", strconv.Itoa(int(mdFromLib.BuildEpoch)), "")
136168
} else { // json
169+
var typeSizesPtr *TypeSizes
170+
if f.DataTypes {
171+
typeSizesPtr = &typeSizes
172+
}
137173
md := struct {
138174
BinaryFormatVsn string `json:"binary_format"`
139175
DatabaseType string `json:"db_type"`
@@ -142,6 +178,7 @@ func CmdMetadata(f CmdMetadataFlags, args []string, printHelp func()) error {
142178
NodeCount uint `json:"node_count"`
143179
TreeSize uint `json:"tree_size"`
144180
DataSectionSize uint `json:"data_section_size"`
181+
TypeSize *TypeSizes `json:"data_type_sizes,omitempty"`
145182
DataSectionStartOffset uint `json:"data_section_start_offset"`
146183
DataSectionEndOffset uint `json:"data_section_end_offset"`
147184
MetadataStartOffset uint `json:"metadata_section_start_offset"`
@@ -156,6 +193,7 @@ func CmdMetadata(f CmdMetadataFlags, args []string, printHelp func()) error {
156193
mdFromLib.NodeCount,
157194
uint(treeSize),
158195
uint(dataSectionSize),
196+
typeSizesPtr,
159197
uint(dataSectionStartOffset),
160198
uint(dataSectionEndOffset),
161199
uint(metadataSectionStartOffset),

lib/utils.go

Lines changed: 193 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"bytes"
55
"encoding/json"
66
"fmt"
7+
"io"
78
"os"
89
"sort"
910
"strconv"
@@ -67,10 +68,10 @@ func mapInterfaceToStr(m map[string]interface{}) map[string]string {
6768
return retVal
6869
}
6970

70-
func findSectionSeparator(mmdbFile, sep string) (int64, error) {
71+
func findSectionSeparator(mmdbFile string, sep string) (int64, error) {
7172
file, err := os.Open(mmdbFile)
7273
if err != nil {
73-
return 0, err
74+
return 0, fmt.Errorf("couldn't open mmdb file: %w", err)
7475
}
7576
defer file.Close()
7677

@@ -96,3 +97,193 @@ func findSectionSeparator(mmdbFile, sep string) (int64, error) {
9697

9798
return -1, nil
9899
}
100+
101+
func simplifySize(size int64) string {
102+
const (
103+
_ = iota
104+
KB = 1 << (10 * iota)
105+
MB
106+
GB
107+
TB
108+
)
109+
110+
if size >= TB {
111+
return fmt.Sprintf("(%.2f TB)", float64(size)/float64(TB))
112+
} else if size >= GB {
113+
return fmt.Sprintf("(%.2f GB)", float64(size)/float64(GB))
114+
} else if size >= MB {
115+
return fmt.Sprintf("(%.2f MB)", float64(size)/float64(MB))
116+
} else if size >= KB {
117+
return fmt.Sprintf("(%.2f KB)", float64(size)/float64(KB))
118+
} else {
119+
return ""
120+
}
121+
122+
}
123+
124+
type TypeSizes struct {
125+
PointerSize int64 `json:"pointer_size"`
126+
Utf8StringSize int64 `json:"utf8_string_size"`
127+
DoubleSize int64 `json:"double_size"`
128+
BytesSize int64 `json:"bytes_size"`
129+
Unsigned16bitIntSize int64 `json:"unsigned_16bit_int_size"`
130+
Unsigned32bitIntSize int64 `json:"unsigned_32bit_int_size"`
131+
Signed32bitIntSize int64 `json:"signed_32bit_int_size"`
132+
Unsigned64bitIntSize int64 `json:"unsigned_64bit_int_size"`
133+
Unsigned128bitIntSize int64 `json:"unsigned_128bit_int_size"`
134+
MapKeyValueCount int64 `json:"map_key_value_pair_count"`
135+
ArrayLength int64 `json:"array_length"`
136+
FloatSize int64 `json:"float_size"`
137+
}
138+
139+
func traverseDataSection(mmdbFile string, startOffset int64, endOffset int64) (TypeSizes, error) {
140+
file, err := os.Open(mmdbFile)
141+
if err != nil {
142+
return TypeSizes{}, fmt.Errorf("couldn't open mmdb file: %w", err)
143+
}
144+
defer file.Close()
145+
146+
// Go to the start offset of the data section.
147+
_, err = file.Seek(startOffset, 0)
148+
if err != nil {
149+
return TypeSizes{}, err
150+
}
151+
152+
var typeSizes TypeSizes
153+
154+
// Read and process bytes until the end offset is reached.
155+
for offset := startOffset; offset < endOffset; {
156+
var controlByte [1]byte
157+
_, err := file.Read(controlByte[:])
158+
if err != nil {
159+
return TypeSizes{}, err
160+
}
161+
offset++
162+
163+
// Extract the type from the control byte.
164+
dataType := (controlByte[0] >> 5) & 0b00000111 // Most significant 3 bits represent the type.
165+
// Extract the payload size from the control byte.
166+
payloadSize := int(controlByte[0] & 0b00011111) // Least significant 5 bits represent payload size.
167+
// Check if it's an extended type.
168+
if dataType == 0 {
169+
// Read actual type number from the next byte
170+
var extendedTypeByte [1]byte
171+
_, err := file.Read(extendedTypeByte[:])
172+
if err != nil {
173+
return TypeSizes{}, fmt.Errorf("couldn't read the file: %v", err)
174+
}
175+
offset++
176+
177+
switch extendedTypeByte[0] {
178+
case 1: // unsigned 32-bit int.
179+
payloadSize, offset, err = payloadCalculation(file, payloadSize, offset)
180+
if err != nil {
181+
return TypeSizes{}, fmt.Errorf("couldn't read the file: %v", err)
182+
}
183+
typeSizes.Signed32bitIntSize += int64(payloadSize)
184+
case 2: // unsigned 64-bit int.
185+
payloadSize, offset, err = payloadCalculation(file, payloadSize, offset)
186+
if err != nil {
187+
return TypeSizes{}, fmt.Errorf("couldn't read the file: %v", err)
188+
}
189+
typeSizes.Unsigned64bitIntSize += int64(payloadSize)
190+
case 3: // unsigned 128-bit int.
191+
payloadSize, offset, err = payloadCalculation(file, payloadSize, offset)
192+
if err != nil {
193+
return TypeSizes{}, fmt.Errorf("couldn't read the file: %v", err)
194+
}
195+
typeSizes.Unsigned128bitIntSize += int64(payloadSize)
196+
case 4: // array.
197+
payloadSize, offset, err = payloadCalculation(file, payloadSize, offset)
198+
if err != nil {
199+
return TypeSizes{}, fmt.Errorf("couldn't read the file: %v", err)
200+
}
201+
typeSizes.ArrayLength += int64(payloadSize)
202+
case 8: // float.
203+
typeSizes.FloatSize += 4
204+
}
205+
} else {
206+
// Process based on the data type.
207+
switch dataType {
208+
case 1: // Pointer.
209+
size := int((controlByte[0] >> 3) & 0b00000011) // Extract the size bits at position 3 and 4.
210+
switch size {
211+
case 1:
212+
typeSizes.PointerSize += 1
213+
case 2:
214+
typeSizes.PointerSize += 2
215+
case 3:
216+
typeSizes.PointerSize += 3
217+
}
218+
case 2: // UTF-8 string.
219+
payloadSize, offset, err = payloadCalculation(file, payloadSize, offset)
220+
if err != nil {
221+
return TypeSizes{}, fmt.Errorf("couldn't read the file: %v", err)
222+
}
223+
typeSizes.Utf8StringSize += int64(payloadSize)
224+
case 3: // Double.
225+
typeSizes.DoubleSize += 8
226+
case 4: // Byte.
227+
payloadSize, offset, err = payloadCalculation(file, payloadSize, offset)
228+
if err != nil {
229+
return TypeSizes{}, fmt.Errorf("couldn't read the file: %v", err)
230+
}
231+
typeSizes.BytesSize += int64(payloadSize)
232+
case 5: // unsigned 16-bit int.
233+
payloadSize, offset, err = payloadCalculation(file, payloadSize, offset)
234+
if err != nil {
235+
return TypeSizes{}, fmt.Errorf("couldn't read the file: %v", err)
236+
}
237+
typeSizes.Unsigned16bitIntSize += int64(payloadSize)
238+
case 6: // unsigned 32-bit int.
239+
payloadSize, offset, err = payloadCalculation(file, payloadSize, offset)
240+
if err != nil {
241+
return TypeSizes{}, fmt.Errorf("couldn't read the file: %v", err)
242+
}
243+
typeSizes.Unsigned32bitIntSize += int64(payloadSize)
244+
case 7: // map.
245+
payloadSize, offset, err = payloadCalculation(file, payloadSize, offset)
246+
if err != nil {
247+
return TypeSizes{}, fmt.Errorf("couldn't read the file: %v", err)
248+
}
249+
typeSizes.MapKeyValueCount += int64(payloadSize)
250+
}
251+
}
252+
}
253+
254+
return typeSizes, nil
255+
}
256+
257+
// This is used for further calculation on the current payload size if it is either 29, 30 or 31.
258+
func payloadCalculation(mmdbFile io.Reader, payloadSize int, offset int64) (int, int64, error) {
259+
if payloadSize == 29 {
260+
// Read the next byte as the payload size.
261+
var nextByte [1]byte
262+
_, err := mmdbFile.Read(nextByte[:])
263+
if err != nil {
264+
return -1, -1, err
265+
}
266+
payloadSize = int(nextByte[0]) + 29
267+
offset++
268+
} else if payloadSize == 30 {
269+
// Read the next two bytes as the payload size.
270+
var nextBytes [2]byte
271+
_, err := mmdbFile.Read(nextBytes[:])
272+
if err != nil {
273+
return -1, -1, err
274+
}
275+
payloadSize = int(nextBytes[0])<<8 + int(nextBytes[1]) + 285
276+
offset += 2
277+
} else if payloadSize == 31 {
278+
// Read the next three bytes as the payload size.
279+
var nextBytes [3]byte
280+
_, err := mmdbFile.Read(nextBytes[:])
281+
if err != nil {
282+
return -1, -1, err
283+
}
284+
payloadSize = int(nextBytes[0])<<16 + int(nextBytes[1])<<8 + int(nextBytes[2]) + 65821
285+
offset += 3
286+
}
287+
288+
return payloadSize, offset, nil
289+
}

0 commit comments

Comments
 (0)