Skip to content

Commit 63ed221

Browse files
authored
fix LOAD DATA 64K buffer limit (#2709)
1 parent 9aafbeb commit 63ed221

File tree

7 files changed

+50
-25
lines changed

7 files changed

+50
-25
lines changed

.gitattributes

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,5 @@ enginetest/testdata/test5.txt binary
77
enginetest/testdata/test6.csv binary
88
enginetest/testdata/test7.txt binary
99
enginetest/testdata/test8.txt binary
10-
enginetest/testdata/test9.txt binary
10+
enginetest/testdata/test9.txt binary
11+
enginetest/testdata/test10.txt binary

enginetest/queries/load_queries.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -539,6 +539,23 @@ var LoadDataScripts = []ScriptTest{
539539
},
540540
},
541541
},
542+
{
543+
Name: "LOAD DATA with column data larger than 64KB",
544+
SetUpScript: []string{
545+
"create table t(id int primary key, lt longtext);",
546+
"load data infile './testdata/test10.txt' into table t fields terminated by ',' lines terminated by '\n';",
547+
},
548+
Assertions: []ScriptTestAssertion{
549+
{
550+
Query: "select id, length(lt) from t order by id",
551+
Expected: []sql.Row{
552+
{1, 65535},
553+
{2, 100000},
554+
{3, 1000000},
555+
},
556+
},
557+
},
558+
},
542559
}
543560

544561
var LoadDataErrorScripts = []ScriptTest{

enginetest/testdata/test10.txt

Lines changed: 3 additions & 0 deletions
Large diffs are not rendered by default.

sql/plan/load_data.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
package plan
1616

1717
import (
18-
"strings"
18+
"bytes"
1919

2020
"github.com/dolthub/vitess/go/vt/sqlparser"
2121

@@ -76,7 +76,7 @@ func (l *LoadData) SplitLines(data []byte, atEOF bool) (advance int, token []byt
7676
}
7777

7878
// Find the index of the LINES TERMINATED BY delim.
79-
if i := strings.Index(string(data), l.LinesTerminatedBy); i >= 0 {
79+
if i := bytes.Index(data, []byte(l.LinesTerminatedBy)); i >= 0 {
8080
return i + len(l.LinesTerminatedBy), data[0:i], nil
8181
}
8282

sql/rowexec/ddl.go

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -94,19 +94,9 @@ func (b *BaseBuilder) buildLoadData(ctx *sql.Context, n *plan.LoadData, row sql.
9494
}
9595

9696
scanner := bufio.NewScanner(reader)
97+
scanner.Buffer(nil, int(types.LongTextBlobMax))
9798
scanner.Split(n.SplitLines)
9899

99-
// Skip through the lines that need to be ignored.
100-
for n.IgnoreNum > 0 && scanner.Scan() {
101-
scanner.Text()
102-
n.IgnoreNum--
103-
}
104-
105-
if scanner.Err() != nil {
106-
reader.Close()
107-
return nil, scanner.Err()
108-
}
109-
110100
sch := n.Schema()
111101
source := sch[0].Source // Schema will always have at least one column
112102
colNames := n.ColNames
@@ -136,6 +126,8 @@ func (b *BaseBuilder) buildLoadData(ctx *sql.Context, n *plan.LoadData, row sql.
136126
setExprs: n.SetExprs,
137127
userVars: n.UserVars,
138128

129+
ignoreNum: n.IgnoreNum,
130+
139131
fieldsTerminatedBy: n.FieldsTerminatedBy,
140132
fieldsEnclosedBy: n.FieldsEnclosedBy,
141133
fieldsEnclosedByOpt: n.FieldsEnclosedByOpt,

sql/rowexec/ddl_iters.go

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ type loadDataIter struct {
4747
setExprs []sql.Expression
4848
userVars []sql.Expression
4949

50+
ignoreNum int64
51+
5052
fieldsTerminatedBy string
5153
fieldsEnclosedBy string
5254
fieldsEnclosedByOpt bool
@@ -56,21 +58,30 @@ type loadDataIter struct {
5658
linesTerminatedBy string
5759
}
5860

59-
func (l loadDataIter) Next(ctx *sql.Context) (returnRow sql.Row, returnErr error) {
60-
var exprs []sql.Expression
61+
var _ sql.RowIter = (*loadDataIter)(nil)
62+
var _ sql.Closer = (*loadDataIter)(nil)
63+
64+
func (l *loadDataIter) Next(ctx *sql.Context) (returnRow sql.Row, returnErr error) {
65+
// skip first ignoreNum lines
6166
var err error
67+
for ; l.ignoreNum > 0 && l.scanner.Scan(); l.ignoreNum-- {
68+
if err = l.scanner.Err(); err != nil {
69+
l.reader.Close()
70+
return nil, err
71+
}
72+
}
73+
6274
// If exprs is nil then this is a skipped line (see test cases). Keep skipping
6375
// until exprs != nil
76+
var exprs []sql.Expression
6477
for exprs == nil {
6578
if keepGoing := l.scanner.Scan(); !keepGoing {
66-
if l.scanner.Err() != nil {
67-
return nil, l.scanner.Err()
79+
if err = l.scanner.Err(); err != nil {
80+
return nil, err
6881
}
6982
return nil, io.EOF
7083
}
71-
72-
line := l.scanner.Text()
73-
exprs, err = l.parseFields(ctx, line)
84+
exprs, err = l.parseFields(ctx, l.scanner.Text())
7485
if err != nil {
7586
return nil, err
7687
}
@@ -101,12 +112,12 @@ func (l loadDataIter) Next(ctx *sql.Context) (returnRow sql.Row, returnErr error
101112
return sql.NewRow(row...), nil
102113
}
103114

104-
func (l loadDataIter) Close(ctx *sql.Context) error {
115+
func (l *loadDataIter) Close(ctx *sql.Context) error {
105116
return l.reader.Close()
106117
}
107118

108119
// parseLinePrefix searches for the delim defined by linesStartingByDelim.
109-
func (l loadDataIter) parseLinePrefix(line string) string {
120+
func (l *loadDataIter) parseLinePrefix(line string) string {
110121
if l.linesStartingBy == "" {
111122
return line
112123
}
@@ -121,7 +132,7 @@ func (l loadDataIter) parseLinePrefix(line string) string {
121132
}
122133
}
123134

124-
func (l loadDataIter) parseFields(ctx *sql.Context, line string) ([]sql.Expression, error) {
135+
func (l *loadDataIter) parseFields(ctx *sql.Context, line string) ([]sql.Expression, error) {
125136
// Step 1. Start by Searching for prefix if there is one
126137
line = l.parseLinePrefix(line)
127138
if line == "" {

sql/types/typecheck_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,9 @@ package types
1717
import (
1818
"testing"
1919

20-
"github.com/dolthub/go-mysql-server/sql"
2120
"github.com/stretchr/testify/assert"
21+
22+
"github.com/dolthub/go-mysql-server/sql"
2223
)
2324

2425
func TestIsGeometry(t *testing.T) {

0 commit comments

Comments
 (0)