Skip to content

Commit 75349bf

Browse files
authored
Merge pull request #204 from xitongsys/dev
Dev
2 parents bd0ac08 + 818e1c3 commit 75349bf

File tree

5 files changed

+133
-31
lines changed

5 files changed

+133
-31
lines changed

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,12 @@ There are two types in Parquet: Primitive Type and Logical Type. Logical types a
3333
|BYTE_ARRAY|BYTE_ARRAY|string|
3434
|FIXED_LEN_BYTE_ARRAY|FIXED_LEN_BYTE_ARRAY|string|
3535
|UTF8|BYTE_ARRAY|string|
36-
|INT_8|INT32|int32|
37-
|INT_16|INT32|int32|
36+
|INT_8|INT32|int8|
37+
|INT_16|INT32|int16|
3838
|INT_32|INT32|int32|
3939
|INT_64|INT64|int64|
40-
|UINT_8|INT32|uint32|
41-
|UINT_16|INT32|uint32|
40+
|UINT_8|INT32|uint8|
41+
|UINT_16|INT32|uint16|
4242
|UINT_32|INT32|uint32|
4343
|UINT_64|INT64|uint64|
4444
|DATE|INT32|int32|

example/column_read.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@ func main() {
3434
log.Println("Can't create parquet writer")
3535
return
3636
}
37-
num := 10
38-
for i := 0; i < num; i++ {
37+
num := int64(10)
38+
for i := 0; int64(i) < num; i++ {
3939
stu := Student{
4040
Name: "StudentName",
4141
Age: int32(20 + i%5),
@@ -70,7 +70,7 @@ func main() {
7070
log.Println("Can't create column reader", err)
7171
return
7272
}
73-
num = int(pr.GetNumRows())
73+
num = int64(pr.GetNumRows())
7474

7575
pr.SkipRowsByPath("parquet_go_root.name", 5) //skip the first five rows
7676
names, rls, dls, err = pr.ReadColumnByPath("parquet_go_root.name", num)

example/type.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,12 @@ type TypeList struct {
2020
FixedLenByteArray string `parquet:"name=FixedLenByteArray, type=FIXED_LEN_BYTE_ARRAY, length=10"`
2121

2222
Utf8 string `parquet:"name=utf8, type=UTF8, encoding=PLAIN_DICTIONARY"`
23-
Int_8 int32 `parquet:"name=int_8, type=INT_8"`
24-
Int_16 int32 `parquet:"name=int_16, type=INT_16"`
23+
Int_8 int8 `parquet:"name=int_8, type=INT_8"`
24+
Int_16 int16 `parquet:"name=int_16, type=INT_16"`
2525
Int_32 int32 `parquet:"name=int_32, type=INT_32"`
2626
Int_64 int64 `parquet:"name=int_64, type=INT_64"`
27-
Uint_8 uint32 `parquet:"name=uint_8, type=UINT_8"`
28-
Uint_16 uint32 `parquet:"name=uint_16, type=UINT_16"`
27+
Uint_8 uint8 `parquet:"name=uint_8, type=UINT_8"`
28+
Uint_16 uint16 `parquet:"name=uint_16, type=UINT_16"`
2929
Uint_32 uint32 `parquet:"name=uint_32, type=UINT_32"`
3030
Uint_64 uint64 `parquet:"name=uint_64, type=UINT_64"`
3131
Date int32 `parquet:"name=date, type=DATE"`
@@ -71,12 +71,12 @@ func main() {
7171
FixedLenByteArray: "HelloWorld",
7272

7373
Utf8: "utf8",
74-
Int_8: int32(i),
75-
Int_16: int32(i),
74+
Int_8: int8(i),
75+
Int_16: int16(i),
7676
Int_32: int32(i),
7777
Int_64: int64(i),
78-
Uint_8: uint32(i),
79-
Uint_16: uint32(i),
78+
Uint_8: uint8(i),
79+
Uint_16: uint16(i),
8080
Uint_32: uint32(i),
8181
Uint_64: uint64(i),
8282
Date: int32(i),

schema/gettype.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,10 @@ func (self *SchemaHandler) GetTypes() []reflect.Type {
4141

4242
if nc == 0 {
4343
if *rT != parquet.FieldRepetitionType_REPEATED {
44-
elementTypes[idx] = types.ParquetTypeToGoReflectType(pT, rT)
44+
elementTypes[idx] = types.ParquetTypeToGoReflectType(pT, cT, rT)
4545

4646
} else {
47-
elementTypes[idx] = reflect.SliceOf(types.ParquetTypeToGoReflectType(pT, nil))
47+
elementTypes[idx] = reflect.SliceOf(types.ParquetTypeToGoReflectType(pT, cT, nil))
4848
}
4949

5050
} else {

types/types.go

Lines changed: 116 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -69,14 +69,44 @@ func TypeNameToParquetType(name string, baseName string) (*parquet.Type, *parque
6969
panic(fmt.Errorf("Unknown data type: '%s'", name))
7070
}
7171

72-
func ParquetTypeToGoReflectType(pT *parquet.Type, rT *parquet.FieldRepetitionType) reflect.Type {
72+
func ParquetTypeToGoReflectType(pT *parquet.Type, cT *parquet.ConvertedType, rT *parquet.FieldRepetitionType) reflect.Type {
7373
if rT == nil || *rT != parquet.FieldRepetitionType_OPTIONAL {
7474
if *pT == parquet.Type_BOOLEAN {
7575
return reflect.TypeOf(true)
7676

77+
} else if *pT == parquet.Type_INT32 && cT == nil {
78+
return reflect.TypeOf(int32(0))
79+
80+
} else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_INT_8 {
81+
return reflect.TypeOf(int8(0))
82+
83+
}else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_INT_16 {
84+
return reflect.TypeOf(int16(0))
85+
86+
}else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_INT_32 {
87+
return reflect.TypeOf(int32(0))
88+
89+
}else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_UINT_8 {
90+
return reflect.TypeOf(uint8(0))
91+
92+
}else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_UINT_16 {
93+
return reflect.TypeOf(uint16(0))
94+
95+
}else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_UINT_32 {
96+
return reflect.TypeOf(uint32(0))
97+
7798
} else if *pT == parquet.Type_INT32 {
7899
return reflect.TypeOf(int32(0))
79100

101+
}else if *pT == parquet.Type_INT64 && cT == nil {
102+
return reflect.TypeOf(int64(0))
103+
104+
}else if *pT == parquet.Type_INT64 && *cT == parquet.ConvertedType_INT_64 {
105+
return reflect.TypeOf(int64(0))
106+
107+
}else if *pT == parquet.Type_INT64 && *cT == parquet.ConvertedType_UINT_64 {
108+
return reflect.TypeOf(uint64(0))
109+
80110
} else if *pT == parquet.Type_INT64 {
81111
return reflect.TypeOf(int64(0))
82112

@@ -104,10 +134,50 @@ func ParquetTypeToGoReflectType(pT *parquet.Type, rT *parquet.FieldRepetitionTyp
104134
v := true
105135
return reflect.TypeOf(&v)
106136

137+
} else if *pT == parquet.Type_INT32 && cT == nil{
138+
v := int32(0)
139+
return reflect.TypeOf(&v)
140+
141+
} else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_INT_8 {
142+
v := int8(0)
143+
return reflect.TypeOf(&v)
144+
145+
} else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_INT_16 {
146+
v := int16(0)
147+
return reflect.TypeOf(&v)
148+
149+
} else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_INT_32 {
150+
v := int32(0)
151+
return reflect.TypeOf(&v)
152+
153+
} else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_UINT_8 {
154+
v := uint8(0)
155+
return reflect.TypeOf(&v)
156+
157+
} else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_UINT_16 {
158+
v := uint16(0)
159+
return reflect.TypeOf(&v)
160+
161+
} else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_UINT_32 {
162+
v := uint32(0)
163+
return reflect.TypeOf(&v)
164+
107165
} else if *pT == parquet.Type_INT32 {
108166
v := int32(0)
109167
return reflect.TypeOf(&v)
110168

169+
} else if *pT == parquet.Type_INT64 && cT == nil {
170+
v := int64(0)
171+
return reflect.TypeOf(&v)
172+
173+
} else if *pT == parquet.Type_INT64 && *cT == parquet.ConvertedType_INT_64 {
174+
v := int64(0)
175+
return reflect.TypeOf(&v)
176+
177+
} else if *pT == parquet.Type_INT64 && *cT == parquet.ConvertedType_UINT_64 {
178+
v := uint64(0)
179+
return reflect.TypeOf(&v)
180+
111181
} else if *pT == parquet.Type_INT64 {
112182
v := int64(0)
113183
return reflect.TypeOf(&v)
@@ -146,10 +216,14 @@ func ParquetTypeToGoType(src interface{}, pT *parquet.Type, cT *parquet.Converte
146216
return src
147217
}
148218

149-
if *cT == parquet.ConvertedType_UINT_8 {
150-
return uint32(src.(int32))
219+
if *cT == parquet.ConvertedType_INT_8 {
220+
return int8(src.(int32))
221+
} else if *cT == parquet.ConvertedType_INT_16 {
222+
return int16(src.(int32))
223+
} else if *cT == parquet.ConvertedType_UINT_8 {
224+
return uint8(src.(int32))
151225
} else if *cT == parquet.ConvertedType_UINT_16 {
152-
return uint32(src.(int32))
226+
return uint16(src.(int32))
153227
} else if *cT == parquet.ConvertedType_UINT_32 {
154228
return uint32(src.(int32))
155229
} else if *cT == parquet.ConvertedType_UINT_64 {
@@ -164,10 +238,14 @@ func GoTypeToParquetType(src interface{}, pT *parquet.Type, cT *parquet.Converte
164238
return src
165239
}
166240

167-
if *cT == parquet.ConvertedType_UINT_8 {
168-
return int32(src.(uint32))
241+
if *cT == parquet.ConvertedType_INT_8 {
242+
return int32(src.(int8))
243+
}else if *cT == parquet.ConvertedType_INT_16 {
244+
return int32(src.(int16))
245+
} else if *cT == parquet.ConvertedType_UINT_8 {
246+
return int32(src.(uint8))
169247
} else if *cT == parquet.ConvertedType_UINT_16 {
170-
return int32(src.(uint32))
248+
return int32(src.(uint16))
171249
} else if *cT == parquet.ConvertedType_UINT_32 {
172250
return int32(src.(uint32))
173251
} else if *cT == parquet.ConvertedType_UINT_64 {
@@ -221,16 +299,40 @@ func StrToParquetType(s string, pT *parquet.Type, cT *parquet.ConvertedType, len
221299
if *cT == parquet.ConvertedType_UTF8 {
222300
return s
223301

224-
} else if *cT == parquet.ConvertedType_INT_8 || *cT == parquet.ConvertedType_INT_16 || *cT == parquet.ConvertedType_INT_32 ||
225-
*cT == parquet.ConvertedType_DATE || *cT == parquet.ConvertedType_TIME_MILLIS {
302+
} else if *cT == parquet.ConvertedType_INT_8 {
303+
var v int8
304+
fmt.Sscanf(s, "%d", &v)
305+
return int32(v)
306+
307+
} else if *cT == parquet.ConvertedType_INT_16 {
308+
var v int16
309+
fmt.Sscanf(s, "%d", &v)
310+
return int32(v)
311+
312+
} else if *cT == parquet.ConvertedType_INT_32 {
226313
var v int32
227314
fmt.Sscanf(s, "%d", &v)
228-
return v
315+
return int32(v)
229316

230-
} else if *cT == parquet.ConvertedType_UINT_8 || *cT == parquet.ConvertedType_UINT_16 || *cT == parquet.ConvertedType_UINT_32 {
231-
var vt uint32
232-
fmt.Sscanf(s, "%d", &vt)
233-
return int32(vt)
317+
} else if *cT == parquet.ConvertedType_UINT_8 {
318+
var v uint8
319+
fmt.Sscanf(s, "%d", &v)
320+
return int32(v)
321+
322+
} else if *cT == parquet.ConvertedType_UINT_16 {
323+
var v uint16
324+
fmt.Sscanf(s, "%d", &v)
325+
return int32(v)
326+
327+
} else if *cT == parquet.ConvertedType_UINT_32 {
328+
var v uint32
329+
fmt.Sscanf(s, "%d", &v)
330+
return int32(v)
331+
332+
} else if *cT == parquet.ConvertedType_DATE || *cT == parquet.ConvertedType_TIME_MILLIS {
333+
var v int32
334+
fmt.Sscanf(s, "%d", &v)
335+
return int32(v)
234336

235337
} else if *cT == parquet.ConvertedType_UINT_64 {
236338
var vt uint64

0 commit comments

Comments
 (0)