Skip to content

Commit 3315174

Browse files
Use Enum16 when more than 127 values (bakwc#117)
1 parent 1a56ff3 commit 3315174

File tree

2 files changed

+88
-11
lines changed

2 files changed

+88
-11
lines changed

mysql_ch_replicator/converter.py

Lines changed: 87 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -284,8 +284,12 @@ def convert_type(self, mysql_type, parameters):
284284
for idx, value_name in enumerate(enum_values):
285285
ch_enum_values.append(f"'{value_name}' = {idx+1}")
286286
ch_enum_values = ', '.join(ch_enum_values)
287-
# Enum8('red' = 1, 'green' = 2, 'black' = 3)
288-
return f'Enum8({ch_enum_values})'
287+
if len(enum_values) <= 127:
288+
# Enum8('red' = 1, 'green' = 2, 'black' = 3)
289+
return f'Enum8({ch_enum_values})'
290+
else:
291+
# Enum16('red' = 1, 'green' = 2, 'black' = 3)
292+
return f'Enum16({ch_enum_values})'
289293
if 'text' in mysql_type:
290294
return 'String'
291295
if 'blob' in mysql_type:
@@ -550,7 +554,7 @@ def _tokenize_alter_query(cls, sql_line):
550554
# The first token is always the column name.
551555
column_name = tokens[0]
552556

553-
# Now merge tokens after the column name that belong to the type.
557+
# Now "merge" tokens after the column name that belong to the type.
554558
# (For many types the type is written as a single token already –
555559
# e.g. "VARCHAR(254)" or "NUMERIC(5, 2)", but for types like
556560
# "DOUBLE PRECISION" or "INT UNSIGNED" the .split() would produce two tokens.)
@@ -829,17 +833,90 @@ def parse_mysql_table_structure(self, create_statement, required_table_name=None
829833
if line.startswith('`'):
830834
end_pos = line.find('`', 1)
831835
field_name = line[1:end_pos]
832-
line = line[end_pos+1:].strip()
833-
definition = line.split(' ')
836+
line = line[end_pos + 1 :].strip()
837+
# Don't split by space for enum and set types that might contain spaces
838+
if line.lower().startswith('enum(') or line.lower().startswith('set('):
839+
# Find the end of the enum/set definition (closing parenthesis)
840+
open_parens = 0
841+
in_quotes = False
842+
quote_char = None
843+
end_pos = -1
844+
845+
for i, char in enumerate(line):
846+
if char in "'\"" and (i == 0 or line[i - 1] != "\\"):
847+
if not in_quotes:
848+
in_quotes = True
849+
quote_char = char
850+
elif char == quote_char:
851+
in_quotes = False
852+
elif char == '(' and not in_quotes:
853+
open_parens += 1
854+
elif char == ')' and not in_quotes:
855+
open_parens -= 1
856+
if open_parens == 0:
857+
end_pos = i + 1
858+
break
859+
860+
if end_pos > 0:
861+
field_type = line[:end_pos]
862+
field_parameters = line[end_pos:].strip()
863+
else:
864+
# Fallback to original behavior if we can't find the end
865+
definition = line.split(' ')
866+
field_type = definition[0]
867+
field_parameters = (
868+
' '.join(definition[1:]) if len(definition) > 1 else ''
869+
)
870+
else:
871+
definition = line.split(' ')
872+
field_type = definition[0]
873+
field_parameters = (
874+
' '.join(definition[1:]) if len(definition) > 1 else ''
875+
)
834876
else:
835877
definition = line.split(' ')
836878
field_name = strip_sql_name(definition[0])
837879
definition = definition[1:]
838-
839-
field_type = definition[0]
840-
field_parameters = ''
841-
if len(definition) > 1:
842-
field_parameters = ' '.join(definition[1:])
880+
if definition and (
881+
definition[0].lower().startswith('enum(')
882+
or definition[0].lower().startswith('set(')
883+
):
884+
line = ' '.join(definition)
885+
# Find the end of the enum/set definition (closing parenthesis)
886+
open_parens = 0
887+
in_quotes = False
888+
quote_char = None
889+
end_pos = -1
890+
891+
for i, char in enumerate(line):
892+
if char in "'\"" and (i == 0 or line[i - 1] != "\\"):
893+
if not in_quotes:
894+
in_quotes = True
895+
quote_char = char
896+
elif char == quote_char:
897+
in_quotes = False
898+
elif char == '(' and not in_quotes:
899+
open_parens += 1
900+
elif char == ')' and not in_quotes:
901+
open_parens -= 1
902+
if open_parens == 0:
903+
end_pos = i + 1
904+
break
905+
906+
if end_pos > 0:
907+
field_type = line[:end_pos]
908+
field_parameters = line[end_pos:].strip()
909+
else:
910+
# Fallback to original behavior
911+
field_type = definition[0]
912+
field_parameters = (
913+
' '.join(definition[1:]) if len(definition) > 1 else ''
914+
)
915+
else:
916+
field_type = definition[0]
917+
field_parameters = (
918+
' '.join(definition[1:]) if len(definition) > 1 else ''
919+
)
843920

844921
additional_data = None
845922
if 'set(' in field_type.lower():

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "mysql-ch-replicator"
3-
version = "0.0.40"
3+
version = "0.0.70"
44
description = "Tool for replication of MySQL databases to ClickHouse"
55
authors = ["Filipp Ozinov <[email protected]>"]
66
license = "MIT"

0 commit comments

Comments
 (0)