|
6 | 6 | from pyparsing import Suppress, CaselessKeyword, Word, alphas, alphanums, delimitedList |
7 | 7 |
|
8 | 8 | from .table_structure import TableStructure, TableField |
9 | | -from .converter_enum_parser import parse_mysql_enum |
| 9 | +from .enum import ( |
| 10 | + parse_mysql_enum, EnumConverter, |
| 11 | + parse_enum_or_set_field, |
| 12 | + extract_enum_or_set_values |
| 13 | +) |
10 | 14 |
|
11 | 15 |
|
12 | 16 | CHARSET_MYSQL_TO_PYTHON = { |
@@ -282,7 +286,7 @@ def convert_type(self, mysql_type, parameters): |
282 | 286 | enum_values = parse_mysql_enum(mysql_type) |
283 | 287 | ch_enum_values = [] |
284 | 288 | for idx, value_name in enumerate(enum_values): |
285 | | - ch_enum_values.append(f"'{value_name}' = {idx+1}") |
| 289 | + ch_enum_values.append(f"'{value_name.lower()}' = {idx+1}") |
286 | 290 | ch_enum_values = ', '.join(ch_enum_values) |
287 | 291 | if len(enum_values) <= 127: |
288 | 292 | # Enum8('red' = 1, 'green' = 2, 'black' = 3) |
@@ -428,9 +432,15 @@ def convert_record( |
428 | 432 | if mysql_field_type.startswith('point'): |
429 | 433 | clickhouse_field_value = parse_mysql_point(clickhouse_field_value) |
430 | 434 |
|
431 | | - if mysql_field_type.startswith('enum(') and isinstance(clickhouse_field_value, int): |
| 435 | + if mysql_field_type.startswith('enum('): |
432 | 436 | enum_values = mysql_structure.fields[idx].additional_data |
433 | | - clickhouse_field_value = enum_values[int(clickhouse_field_value)-1] |
| 437 | + field_name = mysql_structure.fields[idx].name if idx < len(mysql_structure.fields) else "unknown" |
| 438 | + |
| 439 | + clickhouse_field_value = EnumConverter.convert_mysql_to_clickhouse_enum( |
| 440 | + clickhouse_field_value, |
| 441 | + enum_values, |
| 442 | + field_name |
| 443 | + ) |
434 | 444 |
|
435 | 445 | clickhouse_record.append(clickhouse_field_value) |
436 | 446 | return tuple(clickhouse_record) |
@@ -834,107 +844,16 @@ def parse_mysql_table_structure(self, create_statement, required_table_name=None |
834 | 844 | end_pos = line.find('`', 1) |
835 | 845 | field_name = line[1:end_pos] |
836 | 846 | line = line[end_pos + 1 :].strip() |
837 | | - # Don't split by space for enum and set types that might contain spaces |
838 | | - if line.lower().startswith('enum(') or line.lower().startswith('set('): |
839 | | - # Find the end of the enum/set definition (closing parenthesis) |
840 | | - open_parens = 0 |
841 | | - in_quotes = False |
842 | | - quote_char = None |
843 | | - end_pos = -1 |
844 | | - |
845 | | - for i, char in enumerate(line): |
846 | | - if char in "'\"" and (i == 0 or line[i - 1] != "\\"): |
847 | | - if not in_quotes: |
848 | | - in_quotes = True |
849 | | - quote_char = char |
850 | | - elif char == quote_char: |
851 | | - in_quotes = False |
852 | | - elif char == '(' and not in_quotes: |
853 | | - open_parens += 1 |
854 | | - elif char == ')' and not in_quotes: |
855 | | - open_parens -= 1 |
856 | | - if open_parens == 0: |
857 | | - end_pos = i + 1 |
858 | | - break |
859 | | - |
860 | | - if end_pos > 0: |
861 | | - field_type = line[:end_pos] |
862 | | - field_parameters = line[end_pos:].strip() |
863 | | - else: |
864 | | - # Fallback to original behavior if we can't find the end |
865 | | - definition = line.split(' ') |
866 | | - field_type = definition[0] |
867 | | - field_parameters = ( |
868 | | - ' '.join(definition[1:]) if len(definition) > 1 else '' |
869 | | - ) |
870 | | - else: |
871 | | - definition = line.split(' ') |
872 | | - field_type = definition[0] |
873 | | - field_parameters = ( |
874 | | - ' '.join(definition[1:]) if len(definition) > 1 else '' |
875 | | - ) |
| 847 | + # Use our new enum parsing utilities |
| 848 | + field_name, field_type, field_parameters = parse_enum_or_set_field(line, field_name, is_backtick_quoted=True) |
876 | 849 | else: |
877 | 850 | definition = line.split(' ') |
878 | 851 | field_name = strip_sql_name(definition[0]) |
879 | | - definition = definition[1:] |
880 | | - if definition and ( |
881 | | - definition[0].lower().startswith('enum(') |
882 | | - or definition[0].lower().startswith('set(') |
883 | | - ): |
884 | | - line = ' '.join(definition) |
885 | | - # Find the end of the enum/set definition (closing parenthesis) |
886 | | - open_parens = 0 |
887 | | - in_quotes = False |
888 | | - quote_char = None |
889 | | - end_pos = -1 |
890 | | - |
891 | | - for i, char in enumerate(line): |
892 | | - if char in "'\"" and (i == 0 or line[i - 1] != "\\"): |
893 | | - if not in_quotes: |
894 | | - in_quotes = True |
895 | | - quote_char = char |
896 | | - elif char == quote_char: |
897 | | - in_quotes = False |
898 | | - elif char == '(' and not in_quotes: |
899 | | - open_parens += 1 |
900 | | - elif char == ')' and not in_quotes: |
901 | | - open_parens -= 1 |
902 | | - if open_parens == 0: |
903 | | - end_pos = i + 1 |
904 | | - break |
905 | | - |
906 | | - if end_pos > 0: |
907 | | - field_type = line[:end_pos] |
908 | | - field_parameters = line[end_pos:].strip() |
909 | | - else: |
910 | | - # Fallback to original behavior |
911 | | - field_type = definition[0] |
912 | | - field_parameters = ( |
913 | | - ' '.join(definition[1:]) if len(definition) > 1 else '' |
914 | | - ) |
915 | | - else: |
916 | | - field_type = definition[0] |
917 | | - field_parameters = ( |
918 | | - ' '.join(definition[1:]) if len(definition) > 1 else '' |
919 | | - ) |
920 | | - |
921 | | - additional_data = None |
922 | | - if 'set(' in field_type.lower(): |
923 | | - vals = field_type[len('set('):] |
924 | | - close_pos = vals.find(')') |
925 | | - vals = vals[:close_pos] |
926 | | - vals = vals.split(',') |
927 | | - def vstrip(e): |
928 | | - if not e: |
929 | | - return e |
930 | | - if e[0] in '"\'': |
931 | | - return e[1:-1] |
932 | | - return e |
933 | | - vals = [vstrip(v) for v in vals] |
934 | | - additional_data = vals |
935 | | - |
936 | | - if field_type.lower().startswith('enum('): |
937 | | - additional_data = parse_mysql_enum(field_type) |
| 852 | + # Use our new enum parsing utilities |
| 853 | + field_name, field_type, field_parameters = parse_enum_or_set_field(line, field_name, is_backtick_quoted=False) |
| 854 | + |
| 855 | + # Extract additional data for enum and set types |
| 856 | + additional_data = extract_enum_or_set_values(field_type, from_parser_func=parse_mysql_enum) |
938 | 857 |
|
939 | 858 | structure.fields.append(TableField( |
940 | 859 | name=field_name, |
|
0 commit comments