Skip to content

Commit 88a963c

Browse files
committed
feat[wip]: implement XLSX file type
1 parent 82e4362 commit 88a963c

File tree

11 files changed

+440
-2
lines changed

11 files changed

+440
-2
lines changed

client.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
jsonfile "github.com/cloudquery/filetypes/v4/json"
66
"github.com/cloudquery/filetypes/v4/parquet"
77
"github.com/cloudquery/filetypes/v4/types"
8+
"github.com/cloudquery/filetypes/v4/xlsx"
89
)
910

1011
type Client struct {
@@ -17,6 +18,7 @@ var (
1718
_ types.FileType = (*csvfile.Client)(nil)
1819
_ types.FileType = (*jsonfile.Client)(nil)
1920
_ types.FileType = (*parquet.Client)(nil)
21+
_ types.FileType = (*xlsx.Client)(nil)
2022
)
2123

2224
// NewClient creates a new client for the given spec
@@ -49,6 +51,9 @@ func NewClient(spec *FileSpec) (*Client, error) {
4951
case FormatTypeParquet:
5052
client, err = parquet.NewClient(parquet.WithSpec(*spec.parquetSpec))
5153

54+
case FormatTypeXLSX:
55+
client, err = xlsx.NewClient()
56+
5257
default:
5358
// shouldn't be possible as Validate checks for type
5459
panic("unknown format " + spec.Format)

go.mod

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ require (
1111
github.com/invopop/jsonschema v0.13.0
1212
github.com/stretchr/testify v1.10.0
1313
github.com/wk8/go-ordered-map/v2 v2.1.8
14+
github.com/xuri/excelize/v2 v2.9.1
1415
)
1516

1617
require (
@@ -38,11 +39,17 @@ require (
3839
github.com/oapi-codegen/runtime v1.1.1 // indirect
3940
github.com/pierrec/lz4/v4 v4.1.22 // indirect
4041
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
42+
github.com/richardlehane/mscfb v1.0.4 // indirect
43+
github.com/richardlehane/msoleps v1.0.4 // indirect
4144
github.com/rs/zerolog v1.34.0 // indirect
4245
github.com/samber/lo v1.49.1 // indirect
4346
github.com/santhosh-tekuri/jsonschema/v6 v6.0.2 // indirect
4447
github.com/thoas/go-funk v0.9.3 // indirect
48+
github.com/tiendc/go-deepcopy v1.6.0 // indirect
49+
github.com/xuri/efp v0.0.1 // indirect
50+
github.com/xuri/nfp v0.0.1 // indirect
4551
github.com/zeebo/xxh3 v1.0.2 // indirect
52+
golang.org/x/crypto v0.39.0 // indirect
4653
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b // indirect
4754
golang.org/x/mod v0.25.0 // indirect
4855
golang.org/x/net v0.41.0 // indirect

go.sum

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,11 @@ github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE
8888
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
8989
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
9090
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
91+
github.com/richardlehane/mscfb v1.0.4 h1:WULscsljNPConisD5hR0+OyZjwK46Pfyr6mPu5ZawpM=
92+
github.com/richardlehane/mscfb v1.0.4/go.mod h1:YzVpcZg9czvAuhk9T+a3avCpcFPMUWm7gK3DypaEsUk=
93+
github.com/richardlehane/msoleps v1.0.1/go.mod h1:BWev5JBpU9Ko2WAgmZEuiz4/u3ZYTKbjLycmwiWUfWg=
94+
github.com/richardlehane/msoleps v1.0.4 h1:WuESlvhX3gH2IHcd8UqyCuFY5yiq/GR/yqaSM/9/g00=
95+
github.com/richardlehane/msoleps v1.0.4/go.mod h1:BWev5JBpU9Ko2WAgmZEuiz4/u3ZYTKbjLycmwiWUfWg=
9196
github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0=
9297
github.com/rs/zerolog v1.34.0 h1:k43nTLIwcTVQAncfCw4KZ2VY6ukYoZaBPNOE8txlOeY=
9398
github.com/rs/zerolog v1.34.0/go.mod h1:bJsvje4Z08ROH4Nhs5iH600c3IkWhwp44iRc54W6wYQ=
@@ -107,8 +112,16 @@ github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOf
107112
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
108113
github.com/thoas/go-funk v0.9.3 h1:7+nAEx3kn5ZJcnDm2Bh23N2yOtweO14bi//dvRtgLpw=
109114
github.com/thoas/go-funk v0.9.3/go.mod h1:+IWnUfUmFO1+WVYQWQtIJHeRRdaIyyYglZN7xzUPe4Q=
115+
github.com/tiendc/go-deepcopy v1.6.0 h1:0UtfV/imoCwlLxVsyfUd4hNHnB3drXsfle+wzSCA5Wo=
116+
github.com/tiendc/go-deepcopy v1.6.0/go.mod h1:toXoeQoUqXOOS/X4sKuiAoSk6elIdqc0pN7MTgOOo2I=
110117
github.com/wk8/go-ordered-map/v2 v2.1.8 h1:5h/BUHu93oj4gIdvHHHGsScSTMijfx5PeYkE/fJgbpc=
111118
github.com/wk8/go-ordered-map/v2 v2.1.8/go.mod h1:5nJHM5DyteebpVlHnWMV0rPz6Zp7+xBAnxjb1X5vnTw=
119+
github.com/xuri/efp v0.0.1 h1:fws5Rv3myXyYni8uwj2qKjVaRP30PdjeYe2Y6FDsCL8=
120+
github.com/xuri/efp v0.0.1/go.mod h1:ybY/Jr0T0GTCnYjKqmdwxyxn2BQf2RcQIIvex5QldPI=
121+
github.com/xuri/excelize/v2 v2.9.1 h1:VdSGk+rraGmgLHGFaGG9/9IWu1nj4ufjJ7uwMDtj8Qw=
122+
github.com/xuri/excelize/v2 v2.9.1/go.mod h1:x7L6pKz2dvo9ejrRuD8Lnl98z4JLt0TGAwjhW+EiP8s=
123+
github.com/xuri/nfp v0.0.1 h1:MDamSGatIvp8uOmDP8FnmjuQpu90NzdJxo7242ANR9Q=
124+
github.com/xuri/nfp v0.0.1/go.mod h1:WwHg+CVyzlv/TX9xqBFXEZAuxOPxn2k1GNHwG41IIUQ=
112125
github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU=
113126
github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
114127
github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ=
@@ -127,8 +140,12 @@ go.opentelemetry.io/otel/sdk/metric v1.37.0 h1:90lI228XrB9jCMuSdA0673aubgRobVZFh
127140
go.opentelemetry.io/otel/sdk/metric v1.37.0/go.mod h1:cNen4ZWfiD37l5NhS+Keb5RXVWZWpRE+9WyVCpbo5ps=
128141
go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4=
129142
go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0=
143+
golang.org/x/crypto v0.39.0 h1:SHs+kF4LP+f+p14esP5jAoDpHU8Gu/v9lFRK6IT5imM=
144+
golang.org/x/crypto v0.39.0/go.mod h1:L+Xg3Wf6HoL4Bn4238Z6ft6KfEpN0tJGo53AAPC632U=
130145
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b h1:M2rDM6z3Fhozi9O7NWsxAkg/yqS/lQJ6PmkyIV3YP+o=
131146
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b/go.mod h1:3//PLf8L/X+8b4vuAfHzxeRUl04Adcb341+IGKfnqS8=
147+
golang.org/x/image v0.25.0 h1:Y6uW6rH1y5y/LK1J8BPWZtr6yZ7hrsy6hFrXjgsc2fQ=
148+
golang.org/x/image v0.25.0/go.mod h1:tCAmOEGthTtkalusGp1g3xa2gke8J6c2N565dTyl9Rs=
132149
golang.org/x/mod v0.25.0 h1:n7a+ZbQKQA/Ysbyb0/6IbB1H/X41mKgbhfv7AfG/44w=
133150
golang.org/x/mod v0.25.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww=
134151
golang.org/x/net v0.41.0 h1:vBTly1HeNPEn3wtREYfy4GZ/NECgw2Cnl+nK6Nz3uvw=

spec.go

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"github.com/cloudquery/filetypes/v4/csv"
1010
jsonfile "github.com/cloudquery/filetypes/v4/json"
1111
"github.com/cloudquery/filetypes/v4/parquet"
12+
"github.com/cloudquery/filetypes/v4/xlsx"
1213
)
1314

1415
type FormatType string
@@ -17,6 +18,7 @@ const (
1718
FormatTypeCSV = "csv"
1819
FormatTypeJSON = "json"
1920
FormatTypeParquet = "parquet"
21+
FormatTypeXLSX = "xlsx"
2022
)
2123

2224
// Compression type.
@@ -41,6 +43,7 @@ type FileSpec struct {
4143
csvSpec *csv.CSVSpec
4244
jsonSpec *jsonfile.JSONSpec
4345
parquetSpec *parquet.ParquetSpec
46+
xlsxSpec *xlsx.Spec
4447
}
4548

4649
func (s *FileSpec) SetDefaults() {
@@ -51,6 +54,8 @@ func (s *FileSpec) SetDefaults() {
5154
s.jsonSpec.SetDefaults()
5255
case FormatTypeParquet:
5356
s.parquetSpec.SetDefaults()
57+
case FormatTypeXLSX:
58+
s.xlsxSpec.SetDefaults()
5459
}
5560
}
5661

@@ -68,10 +73,14 @@ func (s *FileSpec) Validate() error {
6873
return s.jsonSpec.Validate()
6974
case FormatTypeParquet:
7075
if s.Compression != CompressionTypeNone {
71-
return errors.New("compression is not supported for parquet format") // This won't work even if we wanted to, because parquet writer prematurely closes the file handle
76+
return fmt.Errorf("compression is not supported for the %s format", s.Format)
7277
}
73-
7478
return s.parquetSpec.Validate()
79+
case FormatTypeXLSX:
80+
if s.Compression != CompressionTypeNone {
81+
return fmt.Errorf("compression is not supported for the %s format", s.Format)
82+
}
83+
return s.xlsxSpec.Validate()
7584
default:
7685
return fmt.Errorf("unknown format %s", s.Format)
7786
}
@@ -96,6 +105,9 @@ func (s *FileSpec) UnmarshalSpec() error {
96105
case FormatTypeParquet:
97106
s.parquetSpec = &parquet.ParquetSpec{}
98107
return dec.Decode(s.parquetSpec)
108+
case FormatTypeXLSX:
109+
s.xlsxSpec = &xlsx.Spec{}
110+
return dec.Decode(s.xlsxSpec)
99111
default:
100112
return fmt.Errorf("unknown format %s", s.Format)
101113
}

xlsx/client.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
package xlsx
2+
3+
type Options func(*Client)
4+
5+
// Client is a csv client.
6+
type Client struct {
7+
}
8+
9+
func NewClient(options ...Options) (*Client, error) {
10+
c := &Client{}
11+
12+
for _, option := range options {
13+
option(c)
14+
}
15+
16+
return c, nil
17+
}

xlsx/read.go

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
package xlsx
2+
3+
import (
4+
"bytes"
5+
"fmt"
6+
7+
"github.com/apache/arrow-go/v18/arrow"
8+
"github.com/apache/arrow-go/v18/arrow/array"
9+
"github.com/apache/arrow-go/v18/arrow/memory"
10+
"github.com/cloudquery/filetypes/v4/types"
11+
"github.com/cloudquery/plugin-sdk/v4/schema"
12+
"github.com/goccy/go-json"
13+
"github.com/xuri/excelize/v2"
14+
)
15+
16+
func (cl *Client) Read(r types.ReaderAtSeeker, table *schema.Table, res chan<- arrow.Record) error {
17+
file, err := excelize.OpenReader(r)
18+
if err != nil {
19+
return fmt.Errorf("failed to open xlsx reader: %w", err)
20+
}
21+
22+
sheetName := "data"
23+
rows, err := file.GetRows(sheetName)
24+
if err != nil {
25+
return fmt.Errorf("failed to get rows from sheet %s: %w", sheetName, err)
26+
}
27+
28+
for _, row := range rows {
29+
rb := array.NewRecordBuilder(memory.DefaultAllocator, table.ToArrowSchema())
30+
for i, field := range rb.Fields() {
31+
err := appendValue(field, row[i])
32+
if err != nil {
33+
return fmt.Errorf("failed to read from sheet %s: %w", table.Name, err)
34+
}
35+
}
36+
res <- rb.NewRecord()
37+
}
38+
return nil
39+
}
40+
41+
func appendValue(builder array.Builder, value any) error {
42+
if value == nil {
43+
builder.AppendNull()
44+
return nil
45+
}
46+
switch bldr := builder.(type) {
47+
case array.ListLikeBuilder:
48+
lst := value.([]any)
49+
if lst == nil {
50+
bldr.AppendNull()
51+
return nil
52+
}
53+
bldr.Append(true)
54+
valBuilder := bldr.ValueBuilder()
55+
for _, v := range lst {
56+
if err := appendValue(valBuilder, v); err != nil {
57+
return err
58+
}
59+
}
60+
return nil
61+
case *array.StructBuilder:
62+
m := value.(map[string]any)
63+
bldr.Append(true)
64+
bldrType := bldr.Type().(*arrow.StructType)
65+
for k, v := range m {
66+
idx, _ := bldrType.FieldIdx(k)
67+
fieldBldr := bldr.FieldBuilder(idx)
68+
if err := appendValue(fieldBldr, v); err != nil {
69+
return err
70+
}
71+
}
72+
return nil
73+
case *array.MonthIntervalBuilder, *array.DayTimeIntervalBuilder, *array.MonthDayNanoIntervalBuilder:
74+
b, err := json.Marshal(value)
75+
if err != nil {
76+
return err
77+
}
78+
dec := json.NewDecoder(bytes.NewReader(b))
79+
return bldr.UnmarshalOne(dec)
80+
case *array.Int8Builder, *array.Int16Builder, *array.Int32Builder, *array.Int64Builder:
81+
return bldr.AppendValueFromString(fmt.Sprintf("%d", int64(value.(float64))))
82+
case *array.Uint8Builder, *array.Uint16Builder, *array.Uint32Builder, *array.Uint64Builder:
83+
return bldr.AppendValueFromString(fmt.Sprintf("%d", uint64(value.(float64))))
84+
}
85+
return builder.AppendValueFromString(fmt.Sprintf("%v", value))
86+
}

xlsx/spec.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
package xlsx
2+
3+
import (
4+
"github.com/invopop/jsonschema"
5+
)
6+
7+
type Spec struct{}
8+
9+
func (Spec) JSONSchema() *jsonschema.Schema {
10+
properties := jsonschema.NewProperties()
11+
return &jsonschema.Schema{
12+
Description: "CloudQuery XLSX file output spec.",
13+
Properties: properties,
14+
Type: "object",
15+
AdditionalProperties: jsonschema.FalseSchema, // "additionalProperties": false
16+
}
17+
}
18+
19+
func (s *Spec) SetDefaults() {}
20+
21+
func (s *Spec) Validate() error {
22+
return nil
23+
}

xlsx/spec_test.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
package xlsx
2+
3+
import (
4+
"testing"
5+
6+
"github.com/cloudquery/codegen/jsonschema"
7+
"github.com/stretchr/testify/require"
8+
)
9+
10+
func TestSpec_JSONSchema(t *testing.T) {
11+
schema, err := jsonschema.Generate(Spec{})
12+
require.NoError(t, err)
13+
14+
jsonschema.TestJSONSchema(t, string(schema), []jsonschema.TestCase{
15+
{
16+
Name: "empty",
17+
Spec: `{}`,
18+
},
19+
{
20+
Name: "extra keyword",
21+
Err: true,
22+
Spec: `{"extra":true}`,
23+
},
24+
})
25+
}
26.1 KB
Binary file not shown.

0 commit comments

Comments
 (0)