Skip to content

Commit fa8c09d

Browse files
Fixing bug in blob mapping (#1203)
1 parent 11c8739 commit fa8c09d

File tree

4 files changed

+41
-39
lines changed

4 files changed

+41
-39
lines changed

docs/data-types/mysql.md

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -24,35 +24,35 @@ There are also nuances to handling certain specific data types. These are captur
2424

2525
The Spanner migration tool maps MySQL types to Spanner types as follows:
2626

27-
| **MySQL Type** | **Spanner Type** | **Notes** |
28-
|:-------------------------------------------------:|:--------------------------:|:--------------------------------------------------------:|
29-
| `BOOL`, `BOOLEAN`,<br/>`TINYINT(1)` | `BOOL` | |
30-
| `BIGINT` | `INT64` | |
31-
| `BINARY`, `VARBINARY` | `BYTES(MAX)` | |
32-
| `BLOB` | `BYTES(65535)` | |
33-
| `BLOB(N)` | `BYTES(N)` | |
34-
| `MEDIUMBLOB` | `BYTES(16777215)` | |
35-
| `MEDIUMBLOB(N)` | `BYTES(N)` | |
36-
| `TINYBLOB` | `BYTES(255)` | |
37-
| `TINYBLOB(N)` | `BYTES(N)` | |
38-
| `LONGBLOB` | `BYTES(4294967295)` | |
39-
| `LONGBLOB(N)` | `BYTES(N)` | |
40-
| `BIT` | `BYTES(MAX)` | BIT(1) converts to BOOL, other cases map to BYTES |
41-
| `CHAR` | `STRING(1)` | CHAR defaults to length 1 |
42-
| `CHAR(N)` | `STRING(N)` | differences in treatment of fixed-length character types |
43-
| `DATE` | `DATE` | |
44-
| `DATETIME` | `TIMESTAMP` | differences in treatment of timezones |
45-
| `DECIMAL`, `NUMERIC` | `NUMERIC` | potential changes of precision |
46-
| `DOUBLE` | `FLOAT64` | |
47-
| `ENUM` | `STRING(MAX)` | |
48-
| `FLOAT` | `FLOAT32` | |
49-
| `INTEGER`, `MEDIUMINT`,<br/>`TINYINT`, `SMALLINT` | `INT64` | changes in storage size |
50-
| `JSON` | `JSON` | |
51-
| `SET` | `ARRAY<STRING>` | SET only supports string values |
52-
| `TEXT`, `MEDIUMTEXT`,<br/>`TINYTEXT`, `LONGTEXT` | `STRING(MAX)` | |
53-
| `TIMESTAMP` | `TIMESTAMP` | |
54-
| `VARCHAR` | `STRING(MAX)` | |
55-
| `VARCHAR(N)` | `STRING(N)` | differences in treatment of fixed-length character types |
27+
| **MySQL Type** | **Spanner Type** | **Notes** |
28+
|:-------------------------------------------------:|:-----------------:|:--------------------------------------------------------:|
29+
| `BOOL`, `BOOLEAN`,<br/>`TINYINT(1)` | `BOOL` | |
30+
| `BIGINT` | `INT64` | |
31+
| `BINARY`, `VARBINARY` | `BYTES(MAX)` | |
32+
| `BLOB` | `BYTES(65535)` | |
33+
| `BLOB(N)` | `BYTES(N)` | |
34+
| `MEDIUMBLOB` | `BYTES(10485760)` | |
35+
| `MEDIUMBLOB(N)` | `BYTES(N)` | |
36+
| `TINYBLOB` | `BYTES(255)` | |
37+
| `TINYBLOB(N)` | `BYTES(N)` | |
38+
| `LONGBLOB` | `BYTES(10485760)` | |
39+
| `LONGBLOB(N)` | `BYTES(N)` | |
40+
| `BIT` | `BYTES(MAX)` | BIT(1) converts to BOOL, other cases map to BYTES |
41+
| `CHAR` | `STRING(1)` | CHAR defaults to length 1 |
42+
| `CHAR(N)` | `STRING(N)` | differences in treatment of fixed-length character types |
43+
| `DATE` | `DATE` | |
44+
| `DATETIME` | `TIMESTAMP` | differences in treatment of timezones |
45+
| `DECIMAL`, `NUMERIC` | `NUMERIC` | potential changes of precision |
46+
| `DOUBLE` | `FLOAT64` | |
47+
| `ENUM` | `STRING(MAX)` | |
48+
| `FLOAT` | `FLOAT32` | |
49+
| `INTEGER`, `MEDIUMINT`,<br/>`TINYINT`, `SMALLINT` | `INT64` | changes in storage size |
50+
| `JSON` | `JSON` | |
51+
| `SET` | `ARRAY<STRING>` | SET only supports string values |
52+
| `TEXT`, `MEDIUMTEXT`,<br/>`TINYTEXT`, `LONGTEXT` | `STRING(MAX)` | |
53+
| `TIMESTAMP` | `TIMESTAMP` | |
54+
| `VARCHAR` | `STRING(MAX)` | |
55+
| `VARCHAR(N)` | `STRING(N)` | differences in treatment of fixed-length character types |
5656

5757

5858
Spanner does not support `spatial` datatypes of MySQL. Along with `spatial`

sources/mysql/mysqldump_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,9 @@ func TestProcessMySQLDump_Scalar(t *testing.T) {
4848
{"tinyint(4)", ddl.Type{Name: ddl.Int64}},
4949
{"json", ddl.Type{Name: ddl.JSON}},
5050
{"blob", ddl.Type{Name: ddl.Bytes, Len: int64(65535)}},
51-
{"mediumblob", ddl.Type{Name: ddl.Bytes, Len: int64(16_777_215)}},
51+
{"mediumblob", ddl.Type{Name: ddl.Bytes, Len: int64(10_485_760)}},
5252
{"tinyblob", ddl.Type{Name: ddl.Bytes, Len: int64(255)}},
53-
{"longblob", ddl.Type{Name: ddl.Bytes, Len: int64(4_294_967_295)}},
53+
{"longblob", ddl.Type{Name: ddl.Bytes, Len: int64(10_485_760)}},
5454
{"char(42)", ddl.Type{Name: ddl.String, Len: int64(42)}},
5555
{"date", ddl.Type{Name: ddl.Date}},
5656
{"decimal(4,10)", ddl.Type{Name: ddl.Numeric}},

sources/mysql/toddl.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,14 @@ var maxMysqlSizesMap = map[string]int64{
4040
"LONGBLOB": 1<<32 - 1, // 4,294,967,295 bytes
4141
}
4242

43+
const maxLengthPerCell = 10_485_760
44+
4345
func getMaxSize(srcType string) int64 {
4446
value, found := maxMysqlSizesMap[strings.ToUpper(srcType)]
4547
if !found {
4648
value = ddl.MaxLength
4749
}
48-
return min(value, ddl.MaxLength)
50+
return min(value, maxLengthPerCell)
4951
}
5052

5153
// ToSpannerType maps a scalar source schema type (defined by id and
@@ -234,7 +236,7 @@ func toSpannerTypeInternal(srcType schema.Type, spType string) (ddl.Type, []inte
234236
return ddl.Type{Name: ddl.String, Len: ddl.MaxLength}, nil
235237
default:
236238
if len(srcType.Mods) > 0 {
237-
return ddl.Type{Name: ddl.Bytes, Len: srcType.Mods[0]}, nil
239+
return ddl.Type{Name: ddl.Bytes, Len: min(srcType.Mods[0], maxLengthPerCell)}, nil
238240
}
239241
return ddl.Type{Name: ddl.Bytes, Len: getMaxSize(srcType.Name)}, nil
240242
}

sources/mysql/toddl_test.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ func TestToSpannerTypeInternal(t *testing.T) {
195195
t.Errorf("Error in mediumBlob to byte conversion")
196196
}
197197
assert.Equal(t, "BYTES", mediumBlobToBytesWithoutMods.Name)
198-
assert.Equal(t, int64(16_777_215), mediumBlobToBytesWithoutMods.Len)
198+
assert.Equal(t, int64(10_485_760), mediumBlobToBytesWithoutMods.Len)
199199

200200
longBlobToBytes, errCheck := toSpannerTypeInternal(schema.Type{"longblob", []int64{42}, []int64{1, 2, 3}}, "BYTES")
201201
if errCheck != nil {
@@ -208,7 +208,7 @@ func TestToSpannerTypeInternal(t *testing.T) {
208208
t.Errorf("Error in longBlob to byte conversion")
209209
}
210210
assert.Equal(t, "BYTES", longBlobToBytesWithoutMods.Name)
211-
assert.Equal(t, int64(4_294_967_295), longBlobToBytesWithoutMods.Len)
211+
assert.Equal(t, int64(10_485_760), longBlobToBytesWithoutMods.Len)
212212
}
213213

214214
// This is just a very basic smoke-test for toSpannerType.
@@ -504,22 +504,22 @@ func TestGetMaxSize(t *testing.T) {
504504
{
505505
name: "mediumblob size is capped by MaxLength",
506506
mysqlType: "mediumblob",
507-
want: 16_777_215, // Expected: MaxLength, since 16,777,215 > MaxLength
507+
want: 10_485_760, // Expected: MaxLength, since 16,777,215 > MaxLength
508508
},
509509
{
510510
name: "longblob size is capped by MaxLength",
511511
mysqlType: "longblob",
512-
want: 4_294_967_295, // Expected: MaxLength, since 4,294,967,295 > MaxLength
512+
want: 10_485_760, // Expected: MaxLength, since 4,294,967,295 > MaxLength
513513
},
514514
{
515515
name: "unmapped type returns MaxLength",
516516
mysqlType: "varchar", // A type not present in our map.
517-
want: ddl.MaxLength,
517+
want: 10_485_760,
518518
},
519519
{
520520
name: "empty string type returns MaxLength",
521521
mysqlType: "", // Edge case: empty input string.
522-
want: ddl.MaxLength,
522+
want: 10_485_760,
523523
},
524524
}
525525

0 commit comments

Comments
 (0)