@@ -222,19 +222,31 @@ def convert_table_structure(self, mysql_structure: TableStructure) -> TableStruc
222222 clickhouse_structure .preprocess ()
223223 return clickhouse_structure
224224
225- def convert_records (self , mysql_records , mysql_structure : TableStructure , clickhouse_structure : TableStructure ):
225+ def convert_records (
226+ self , mysql_records , mysql_structure : TableStructure , clickhouse_structure : TableStructure ,
227+ only_primary : bool = False ,
228+ ):
226229 mysql_field_types = [field .field_type for field in mysql_structure .fields ]
227230 clickhouse_filed_types = [field .field_type for field in clickhouse_structure .fields ]
228231
229232 clickhouse_records = []
230233 for mysql_record in mysql_records :
231- clickhouse_record = self .convert_record (mysql_record , mysql_field_types , clickhouse_filed_types )
234+ clickhouse_record = self .convert_record (
235+ mysql_record , mysql_field_types , clickhouse_filed_types , mysql_structure , only_primary ,
236+ )
232237 clickhouse_records .append (clickhouse_record )
233238 return clickhouse_records
234239
235- def convert_record (self , mysql_record , mysql_field_types , clickhouse_field_types ):
240+ def convert_record (
241+ self , mysql_record , mysql_field_types , clickhouse_field_types , mysql_structure : TableStructure ,
242+ only_primary : bool ,
243+ ):
236244 clickhouse_record = []
237245 for idx , mysql_field_value in enumerate (mysql_record ):
246+ if only_primary and idx not in mysql_structure .primary_key_ids :
247+ clickhouse_record .append (mysql_field_value )
248+ continue
249+
238250 clickhouse_field_value = mysql_field_value
239251 mysql_field_type = mysql_field_types [idx ]
240252 clickhouse_field_type = clickhouse_field_types [idx ]
@@ -256,6 +268,13 @@ def convert_record(self, mysql_record, mysql_field_types, clickhouse_field_types
256268 if 'UInt64' in clickhouse_field_type and clickhouse_field_value < 0 :
257269 clickhouse_field_value = 18446744073709551616 + clickhouse_field_value
258270
271+ if 'String' in clickhouse_field_type and (
272+ 'text' in mysql_field_type or 'char' in mysql_field_type
273+ ):
274+ if isinstance (clickhouse_field_value , bytes ):
275+ charset = mysql_structure .charset or 'utf-8'
276+ clickhouse_field_value = clickhouse_field_value .decode (charset )
277+
259278 if 'point' in mysql_field_type :
260279 clickhouse_field_value = parse_mysql_point (clickhouse_field_value )
261280
@@ -513,6 +532,18 @@ def parse_mysql_table_structure(self, create_statement, required_table_name=None
513532 inner_tokens = '' .join ([str (t ) for t in inner_tokens [1 :- 1 ]]).strip ()
514533 inner_tokens = split_high_level (inner_tokens , ',' )
515534
535+ prev_token = ''
536+ prev_prev_token = ''
537+ for line in tokens [4 :]:
538+ curr_token = line .value
539+ if prev_token == '=' and prev_prev_token .lower () == 'charset' :
540+ structure .charset = curr_token
541+ prev_prev_token = prev_token
542+ prev_token = curr_token
543+
544+ if structure .charset .startswith ('utf8' ):
545+ structure .charset = 'utf-8'
546+
516547 for line in inner_tokens :
517548 if line .lower ().startswith ('unique key' ):
518549 continue
0 commit comments