Skip to content

Commit d69fe91

Browse files
committed
importer: fix collated string imports for MySQL/DELIMITED DATA
We previously special cased `ParseDatumStringWithRawBytes` for MySQL related imports, which was buggy for collated strings as well. Instead, make import have a specialised method for converting MySQL literals to the relevant data type without compromising on collated strings. Release note (bug fix): Previously, using IMPORT INTO for DELIMITED DATA or MySQL imports would error with `column ... does not exist` if it was importing into a collated string column.
1 parent 9d6232e commit d69fe91

File tree

5 files changed

+29
-27
lines changed

5 files changed

+29
-27
lines changed

pkg/sql/importer/import_stmt_test.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5730,6 +5730,16 @@ func TestImportDelimited(t *testing.T) {
57305730
sqlDB.Exec(t, intoCmd, opts...)
57315731
checkQueryResults(t, fmt.Sprintf(`SELECT i, s, b FROM into%d ORDER BY i`, i))
57325732
})
5733+
t.Run("import-into-collated", func(t *testing.T) {
5734+
defer sqlDB.Exec(t, fmt.Sprintf(`DROP TABLE into%d`, i))
5735+
sqlDB.Exec(t, fmt.Sprintf(`CREATE TABLE into%d (i INT8 PRIMARY KEY, s text COLLATE "en_US", b bytea)`, i))
5736+
intoCmd := fmt.Sprintf(`IMPORT INTO into%d (i, s, b) DELIMITED DATA ($1)`, i)
5737+
if len(flags) > 0 {
5738+
intoCmd += " WITH " + strings.Join(flags, ", ")
5739+
}
5740+
sqlDB.Exec(t, intoCmd, opts...)
5741+
checkQueryResults(t, fmt.Sprintf(`SELECT i, s, b FROM into%d ORDER BY i`, i))
5742+
})
57335743
t.Run("import-into-target-cols-reordered", func(t *testing.T) {
57345744
defer sqlDB.Exec(t, fmt.Sprintf(`DROP TABLE into%d`, i))
57355745
sqlDB.Exec(t, fmt.Sprintf(`CREATE TABLE into%d (b bytea, i INT8 PRIMARY KEY, s text)`, i))

pkg/sql/importer/read_import_mysql.go

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,19 @@ const (
206206
zeroTime = "0000-00-00 00:00:00"
207207
)
208208

209+
func mysqlStrToDatum(evalCtx *eval.Context, s string, desired *types.T) (tree.Datum, error) {
210+
switch desired.Family() {
211+
case types.BytesFamily:
212+
// mysql emits raw byte strings that do not use the same escaping as our ParseBytes
213+
// function expects, and the difference between ParseStringAs and
214+
// ParseDatumStringAs is whether or not it attempts to parse bytes.
215+
return tree.NewDBytes(tree.DBytes(s)), nil
216+
default:
217+
res, _, err := tree.ParseAndRequireString(desired, s, evalCtx)
218+
return res, err
219+
}
220+
}
221+
209222
// mysqlValueToDatum attempts to convert a value, as parsed from a mysqldump
210223
// INSERT statement, in to a Cockroach Datum of type `desired`. The MySQL parser
211224
// does not parse the values themselves to Go primitivies, rather leaving the
@@ -239,11 +252,7 @@ func mysqlValueToDatum(
239252
}
240253
}
241254
}
242-
// This uses ParseDatumStringAsWithRawBytes instead of ParseDatumStringAs since mysql emits
243-
// raw byte strings that do not use the same escaping as our ParseBytes
244-
// function expects, and the difference between ParseStringAs and
245-
// ParseDatumStringAs is whether or not it attempts to parse bytes.
246-
return rowenc.ParseDatumStringAsWithRawBytes(ctx, desired, s, evalContext)
255+
return mysqlStrToDatum(evalContext, s, desired)
247256
case mysql.IntVal:
248257
return rowenc.ParseDatumStringAs(ctx, desired, string(v.Val), evalContext)
249258
case mysql.FloatVal:

pkg/sql/importer/read_import_mysql_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ import (
4343
"github.com/cockroachdb/cockroach/pkg/util/log"
4444
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
4545
"github.com/kr/pretty"
46+
"github.com/stretchr/testify/require"
4647
mysql "vitess.io/vitess/go/vt/sqlparser"
4748
)
4849

@@ -356,13 +357,16 @@ func TestMysqlValueToDatum(t *testing.T) {
356357
}
357358
return d
358359
}
360+
asdfCS, err := tree.NewDCollatedString("Asdf", "en_US", &tree.CollationEnvironment{})
361+
require.NoError(t, err)
359362
tests := []struct {
360363
raw mysql.Expr
361364
typ *types.T
362365
want tree.Datum
363366
}{
364367
{raw: mysql.NewStrLiteral([]byte("0000-00-00")), typ: types.Date, want: tree.DNull},
365368
{raw: mysql.NewStrLiteral([]byte("2010-01-01")), typ: types.Date, want: date("2010-01-01")},
369+
{raw: mysql.NewStrLiteral([]byte("Asdf")), typ: types.MakeCollatedString(types.String, "en_US"), want: asdfCS},
366370
{raw: mysql.NewStrLiteral([]byte("0000-00-00 00:00:00")), typ: types.Timestamp, want: tree.DNull},
367371
{raw: mysql.NewStrLiteral([]byte("2010-01-01 00:00:00")), typ: types.Timestamp, want: ts("2010-01-01 00:00:00")},
368372
}

pkg/sql/importer/read_import_mysqlout.go

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ import (
2323
"github.com/cockroachdb/cockroach/pkg/security/username"
2424
"github.com/cockroachdb/cockroach/pkg/sql/catalog"
2525
"github.com/cockroachdb/cockroach/pkg/sql/row"
26-
"github.com/cockroachdb/cockroach/pkg/sql/rowenc"
2726
"github.com/cockroachdb/cockroach/pkg/sql/sem/eval"
2827
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
2928
"github.com/cockroachdb/cockroach/pkg/util/ctxgroup"
@@ -258,12 +257,8 @@ func (d *delimitedConsumer) FillDatums(
258257
} else if (!d.opts.HasEscape && field == "NULL") || d.opts.NullEncoding != nil && field == *d.opts.NullEncoding {
259258
datum = tree.DNull
260259
} else {
261-
// This uses ParseDatumStringAsWithRawBytes instead of ParseDatumStringAs since mysql emits
262-
// raw byte strings that do not use the same escaping as our ParseBytes
263-
// function expects, and the difference between ParseStringAs and
264-
// ParseDatumStringAs is whether or not it attempts to parse bytes.
265260
var err error
266-
datum, err = rowenc.ParseDatumStringAsWithRawBytes(ctx, conv.VisibleColTypes[datumIdx], field, conv.EvalCtx)
261+
datum, err = mysqlStrToDatum(conv.EvalCtx, field, conv.VisibleColTypes[datumIdx])
267262
if err != nil {
268263
col := conv.VisibleCols[datumIdx]
269264
return newImportRowError(

pkg/sql/rowenc/roundtrip_format.go

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -35,22 +35,6 @@ func ParseDatumStringAs(
3535
}
3636
}
3737

38-
// ParseDatumStringAsWithRawBytes parses s as type t. However, if the requested type is Bytes
39-
// then the string is returned unchanged. This function is used when the input string might be
40-
// unescaped raw bytes, so we don't want to run a bytes parsing routine on the input. Other
41-
// than the bytes case, this function does the same as ParseDatumStringAs but is not
42-
// guaranteed to round-trip.
43-
func ParseDatumStringAsWithRawBytes(
44-
ctx context.Context, t *types.T, s string, evalCtx *eval.Context,
45-
) (tree.Datum, error) {
46-
switch t.Family() {
47-
case types.BytesFamily:
48-
return tree.NewDBytes(tree.DBytes(s)), nil
49-
default:
50-
return ParseDatumStringAs(ctx, t, s, evalCtx)
51-
}
52-
}
53-
5438
func parseAsTyp(
5539
ctx context.Context, evalCtx *eval.Context, typ *types.T, s string,
5640
) (tree.Datum, error) {

0 commit comments

Comments
 (0)