Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 66 additions & 64 deletions arrow/datatype_binary.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,95 +27,97 @@ type OffsetTraits interface {
BytesRequired(int) int
}

var (
binaryTypeLayout = DataTypeLayout{
Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Int32SizeBytes), SpecVariableWidth()},
}
stringTypeLayout = DataTypeLayout{
Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Int32SizeBytes), SpecVariableWidth()},
}
largeBinaryTypeLayout = DataTypeLayout{
Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Int64SizeBytes), SpecVariableWidth()},
}
largeStringTypeLayout = DataTypeLayout{
Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Int64SizeBytes), SpecVariableWidth()},
}

variadic = SpecVariableWidth()

binaryViewTypeLayout = DataTypeLayout{
Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(ViewHeaderSizeBytes)},
VariadicSpec: &variadic,
}
stringViewTypeLayout = DataTypeLayout{
Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(ViewHeaderSizeBytes)},
VariadicSpec: &variadic,
}
)

type BinaryType struct{}

func (t *BinaryType) ID() Type { return BINARY }
func (t *BinaryType) Name() string { return "binary" }
func (t *BinaryType) String() string { return "binary" }
func (t *BinaryType) binary() {}
func (t *BinaryType) Fingerprint() string { return typeFingerprint(t) }
func (t *BinaryType) Layout() DataTypeLayout {
return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(),
SpecFixedWidth(Int32SizeBytes), SpecVariableWidth()}}
}
func (t *BinaryType) ID() Type { return BINARY }
func (t *BinaryType) Name() string { return "binary" }
func (t *BinaryType) String() string { return "binary" }
func (t *BinaryType) binary() {}
func (t *BinaryType) Fingerprint() string { return typeFingerprint(t) }
func (t *BinaryType) Layout() DataTypeLayout { return binaryTypeLayout }
func (t *BinaryType) OffsetTypeTraits() OffsetTraits { return Int32Traits }
func (BinaryType) IsUtf8() bool { return false }

type StringType struct{}

func (t *StringType) ID() Type { return STRING }
func (t *StringType) Name() string { return "utf8" }
func (t *StringType) String() string { return "utf8" }
func (t *StringType) binary() {}
func (t *StringType) Fingerprint() string { return typeFingerprint(t) }
func (t *StringType) Layout() DataTypeLayout {
return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(),
SpecFixedWidth(Int32SizeBytes), SpecVariableWidth()}}
}
func (t *StringType) ID() Type { return STRING }
func (t *StringType) Name() string { return "utf8" }
func (t *StringType) String() string { return "utf8" }
func (t *StringType) binary() {}
func (t *StringType) Fingerprint() string { return typeFingerprint(t) }
func (t *StringType) Layout() DataTypeLayout { return stringTypeLayout }
func (t *StringType) OffsetTypeTraits() OffsetTraits { return Int32Traits }
func (StringType) IsUtf8() bool { return true }

type LargeBinaryType struct{}

func (t *LargeBinaryType) ID() Type { return LARGE_BINARY }
func (t *LargeBinaryType) Name() string { return "large_binary" }
func (t *LargeBinaryType) String() string { return "large_binary" }
func (t *LargeBinaryType) binary() {}
func (t *LargeBinaryType) Fingerprint() string { return typeFingerprint(t) }
func (t *LargeBinaryType) Layout() DataTypeLayout {
return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(),
SpecFixedWidth(Int64SizeBytes), SpecVariableWidth()}}
}
func (t *LargeBinaryType) ID() Type { return LARGE_BINARY }
func (t *LargeBinaryType) Name() string { return "large_binary" }
func (t *LargeBinaryType) String() string { return "large_binary" }
func (t *LargeBinaryType) binary() {}
func (t *LargeBinaryType) Fingerprint() string { return typeFingerprint(t) }
func (t *LargeBinaryType) Layout() DataTypeLayout { return largeBinaryTypeLayout }
func (t *LargeBinaryType) OffsetTypeTraits() OffsetTraits { return Int64Traits }
func (LargeBinaryType) IsUtf8() bool { return false }

type LargeStringType struct{}

func (t *LargeStringType) ID() Type { return LARGE_STRING }
func (t *LargeStringType) Name() string { return "large_utf8" }
func (t *LargeStringType) String() string { return "large_utf8" }
func (t *LargeStringType) binary() {}
func (t *LargeStringType) Fingerprint() string { return typeFingerprint(t) }
func (t *LargeStringType) Layout() DataTypeLayout {
return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(),
SpecFixedWidth(Int64SizeBytes), SpecVariableWidth()}}
}
func (t *LargeStringType) ID() Type { return LARGE_STRING }
func (t *LargeStringType) Name() string { return "large_utf8" }
func (t *LargeStringType) String() string { return "large_utf8" }
func (t *LargeStringType) binary() {}
func (t *LargeStringType) Fingerprint() string { return typeFingerprint(t) }
func (t *LargeStringType) Layout() DataTypeLayout { return largeStringTypeLayout }
func (t *LargeStringType) OffsetTypeTraits() OffsetTraits { return Int64Traits }
func (LargeStringType) IsUtf8() bool { return true }

type BinaryViewType struct{}

func (*BinaryViewType) ID() Type { return BINARY_VIEW }
func (*BinaryViewType) Name() string { return "binary_view" }
func (*BinaryViewType) String() string { return "binary_view" }
func (*BinaryViewType) IsUtf8() bool { return false }
func (*BinaryViewType) binary() {}
func (*BinaryViewType) view() {}
func (t *BinaryViewType) Fingerprint() string { return typeFingerprint(t) }
func (*BinaryViewType) Layout() DataTypeLayout {
variadic := SpecVariableWidth()
return DataTypeLayout{
Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(ViewHeaderSizeBytes)},
VariadicSpec: &variadic,
}
}
func (*BinaryViewType) ID() Type { return BINARY_VIEW }
func (*BinaryViewType) Name() string { return "binary_view" }
func (*BinaryViewType) String() string { return "binary_view" }
func (*BinaryViewType) IsUtf8() bool { return false }
func (*BinaryViewType) binary() {}
func (*BinaryViewType) view() {}
func (t *BinaryViewType) Fingerprint() string { return typeFingerprint(t) }
func (*BinaryViewType) Layout() DataTypeLayout { return binaryViewTypeLayout }

type StringViewType struct{}

func (*StringViewType) ID() Type { return STRING_VIEW }
func (*StringViewType) Name() string { return "string_view" }
func (*StringViewType) String() string { return "string_view" }
func (*StringViewType) IsUtf8() bool { return true }
func (*StringViewType) binary() {}
func (*StringViewType) view() {}
func (t *StringViewType) Fingerprint() string { return typeFingerprint(t) }
func (*StringViewType) Layout() DataTypeLayout {
variadic := SpecVariableWidth()
return DataTypeLayout{
Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(ViewHeaderSizeBytes)},
VariadicSpec: &variadic,
}
}
func (*StringViewType) ID() Type { return STRING_VIEW }
func (*StringViewType) Name() string { return "string_view" }
func (*StringViewType) String() string { return "string_view" }
func (*StringViewType) IsUtf8() bool { return true }
func (*StringViewType) binary() {}
func (*StringViewType) view() {}
func (t *StringViewType) Fingerprint() string { return typeFingerprint(t) }
func (*StringViewType) Layout() DataTypeLayout { return stringViewTypeLayout }

var (
BinaryTypes = struct {
Expand Down
50 changes: 37 additions & 13 deletions arrow/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,26 +169,38 @@ func NewSchema(fields []Field, metadata *Metadata) *Schema {
}

func NewSchemaWithEndian(fields []Field, metadata *Metadata, e endian.Endianness) *Schema {
var mdClone *Metadata
if metadata != nil {
md := metadata.clone()
mdClone = &md
}

fClone := make([]Field, len(fields))
copy(fClone, fields)

return newSchema(fClone, mdClone, e)
}

func newSchema(fields []Field, metadata *Metadata, e endian.Endianness) *Schema {
sc := &Schema{
fields: make([]Field, 0, len(fields)),
fields: fields,
index: make(map[string][]int, len(fields)),
endianness: e,
}
if metadata != nil {
sc.meta = metadata.clone()
sc.meta = *metadata
}
for i, field := range fields {
if field.Type == nil {
panic("arrow: field with nil DataType")
}
sc.fields = append(sc.fields, field)
sc.index[field.Name] = append(sc.index[field.Name], i)
}
return sc
}

func (sc *Schema) WithEndianness(e endian.Endianness) *Schema {
return NewSchemaWithEndian(sc.fields, &sc.meta, e)
return newSchema(sc.fields, &sc.meta, e)
}

func (sc *Schema) Endianness() endian.Endianness { return sc.endianness }
Expand All @@ -207,11 +219,17 @@ func (sc *Schema) FieldsByName(n string) ([]Field, bool) {
if !ok {
return nil, ok
}
fields := make([]Field, 0, len(indices))
for _, v := range indices {
fields = append(fields, sc.fields[v])
if len(indices) == 1 {
return sc.fields[indices[0] : indices[0]+1], ok
} else if len(indices) > 1 {
fields := make([]Field, 0, len(indices))
for _, v := range indices {
fields = append(fields, sc.fields[v])
}
return fields, ok
}
return fields, ok

return nil, false
}

// FieldIndices returns the indices of the named field or nil.
Expand Down Expand Up @@ -250,11 +268,17 @@ func (s *Schema) AddField(i int, field Field) (*Schema, error) {
return nil, fmt.Errorf("arrow: invalid field index %d", i)
}

fields := make([]Field, len(s.fields)+1)
copy(fields[:i], s.fields[:i])
fields[i] = field
copy(fields[i+1:], s.fields[i:])
return NewSchema(fields, &s.meta), nil
var fields []Field
if i == len(s.fields) {
fields = append(s.fields, field)
} else {
fields = make([]Field, len(s.fields)+1)
copy(fields[:i], s.fields[:i])
fields[i] = field
copy(fields[i+1:], s.fields[i:])
}

return newSchema(fields, &s.meta, s.endianness), nil
}

func (s *Schema) String() string {
Expand Down