@@ -228,11 +228,13 @@ def convert_records(self, mysql_records, mysql_structure: TableStructure, clickh
228228
229229 clickhouse_records = []
230230 for mysql_record in mysql_records :
231- clickhouse_record = self .convert_record (mysql_record , mysql_field_types , clickhouse_filed_types )
231+ clickhouse_record = self .convert_record (
232+ mysql_record , mysql_field_types , clickhouse_filed_types , mysql_structure ,
233+ )
232234 clickhouse_records .append (clickhouse_record )
233235 return clickhouse_records
234236
235- def convert_record (self , mysql_record , mysql_field_types , clickhouse_field_types ):
237+ def convert_record (self , mysql_record , mysql_field_types , clickhouse_field_types , mysql_structure : TableStructure ):
236238 clickhouse_record = []
237239 for idx , mysql_field_value in enumerate (mysql_record ):
238240 clickhouse_field_value = mysql_field_value
@@ -256,6 +258,13 @@ def convert_record(self, mysql_record, mysql_field_types, clickhouse_field_types
256258 if 'UInt64' in clickhouse_field_type and clickhouse_field_value < 0 :
257259 clickhouse_field_value = 18446744073709551616 + clickhouse_field_value
258260
261+ if 'String' in clickhouse_field_type and (
262+ 'text' in mysql_field_type or 'char' in mysql_field_type
263+ ):
264+ if isinstance (clickhouse_field_value , bytes ):
265+ charset = mysql_structure .charset or 'utf-8'
266+ clickhouse_field_value = clickhouse_field_value .decode (charset )
267+
259268 if 'point' in mysql_field_type :
260269 clickhouse_field_value = parse_mysql_point (clickhouse_field_value )
261270
@@ -513,6 +522,18 @@ def parse_mysql_table_structure(self, create_statement, required_table_name=None
513522 inner_tokens = '' .join ([str (t ) for t in inner_tokens [1 :- 1 ]]).strip ()
514523 inner_tokens = split_high_level (inner_tokens , ',' )
515524
525+ prev_token = ''
526+ prev_prev_token = ''
527+ for line in tokens [4 :]:
528+ curr_token = line .value
529+ if prev_token == '=' and prev_prev_token .lower () == 'charset' :
530+ structure .charset = curr_token
531+ prev_prev_token = prev_token
532+ prev_token = curr_token
533+
534+ if structure .charset .startswith ('utf8' ):
535+ structure .charset = 'utf-8'
536+
516537 for line in inner_tokens :
517538 if line .lower ().startswith ('unique key' ):
518539 continue
0 commit comments