@@ -196,6 +196,44 @@ def convert_type(self, mysql_type, parameters):
196196 if mysql_type == 'point' :
197197 return 'Tuple(x Float32, y Float32)'
198198
199+ # Correctly handle numeric types
200+ if mysql_type .startswith ('numeric' ):
201+ # Determine if parameters are specified via parentheses:
202+ if '(' in mysql_type and ')' in mysql_type :
203+ # Expecting a type definition like "numeric(precision, scale)"
204+ pattern = r"numeric\((\d+)\s*,\s*(\d+)\)"
205+ match = re .search (pattern , mysql_type )
206+ if not match :
207+ raise ValueError (f"Invalid numeric type definition: { mysql_type } " )
208+
209+ precision = int (match .group (1 ))
210+ scale = int (match .group (2 ))
211+ else :
212+ # If no parentheses are provided, assume defaults.
213+ precision = 10 # or other default as defined by your standards
214+ scale = 0
215+
216+ # If no fractional part, consider mapping to integer type (if desired)
217+ if scale == 0 :
218+ if is_unsigned :
219+ if precision <= 9 :
220+ return "UInt32"
221+ elif precision <= 18 :
222+ return "UInt64"
223+ else :
224+ # For very large precisions, fallback to Decimal
225+ return f"Decimal({ precision } , { scale } )"
226+ else :
227+ if precision <= 9 :
228+ return "Int32"
229+ elif precision <= 18 :
230+ return "Int64"
231+ else :
232+ return f"Decimal({ precision } , { scale } )"
233+ else :
234+ # For types with a defined fractional part, use a Decimal mapping.
235+ return f"Decimal({ precision } , { scale } )"
236+
199237 if mysql_type == 'int' :
200238 if is_unsigned :
201239 return 'UInt32'
@@ -472,7 +510,68 @@ def convert_alter_query(self, mysql_query, db_name):
472510
473511 raise Exception (f'operation { op_name } not implement, query: { subquery } ' )
474512
513+ @classmethod
514+ def _tokenize_alter_query (cls , sql_line ):
515+ # We want to recognize tokens that may be:
516+ # 1. A backquoted identifier that can optionally be immediately followed by parentheses.
517+ # 2. A plain word (letters/digits/underscore) that may immediately be followed by a parenthesized argument list.
518+ # 3. A single-quoted or double-quoted string.
519+ # 4. Or, if nothing else, any non‐whitespace sequence.
520+ #
521+ # The order is important: for example, if a word is immediately followed by parentheses,
522+ # we want to grab it as a single token.
523+ token_pattern = re .compile (r'''
524+ ( # start capture group for a token
525+ `[^`]+`(?:\([^)]*\))? | # backquoted identifier w/ optional parentheses
526+ \w+(?:\([^)]*\))? | # a word with optional parentheses
527+ '(?:\\'|[^'])*' | # a single-quoted string
528+ "(?:\\"|[^"])*" | # a double-quoted string
529+ [^\s]+ # fallback: any sequence of non-whitespace characters
530+ )
531+ ''' , re .VERBOSE )
532+ tokens = token_pattern .findall (sql_line )
533+
534+ # Now, split the column definition into:
535+ # token0 = column name,
536+ # token1 = data type (which might be multiple tokens, e.g. DOUBLE PRECISION, INT UNSIGNED,
537+ # or a word+parentheses like VARCHAR(254) or NUMERIC(5, 2)),
538+ # remaining tokens: the parameters such as DEFAULT, NOT, etc.
539+ #
540+ # We define a set of keywords that indicate the start of column options.
541+ constraint_keywords = {
542+ "DEFAULT" , "NOT" , "NULL" , "AUTO_INCREMENT" , "PRIMARY" , "UNIQUE" ,
543+ "COMMENT" , "COLLATE" , "REFERENCES" , "ON" , "CHECK" , "CONSTRAINT" ,
544+ "AFTER" , "BEFORE" , "GENERATED" , "VIRTUAL" , "STORED"
545+ }
546+
547+ if not tokens :
548+ return tokens
549+ # The first token is always the column name.
550+ column_name = tokens [0 ]
551+
552+ # Now “merge” tokens after the column name that belong to the type.
553+ # (For many types the type is written as a single token already –
554+ # e.g. "VARCHAR(254)" or "NUMERIC(5, 2)", but for types like
555+ # "DOUBLE PRECISION" or "INT UNSIGNED" the .split() would produce two tokens.)
556+ type_tokens = []
557+ i = 1
558+ while i < len (tokens ) and tokens [i ].upper () not in constraint_keywords :
559+ type_tokens .append (tokens [i ])
560+ i += 1
561+ merged_type = " " .join (type_tokens ) if type_tokens else ""
562+
563+ # The remaining tokens are passed through unchanged.
564+ param_tokens = tokens [i :]
565+
566+ # Result: [column name, merged type, all the rest]
567+ if merged_type :
568+ return [column_name , merged_type ] + param_tokens
569+ else :
570+ return [column_name ] + param_tokens
571+
475572 def __convert_alter_table_add_column (self , db_name , table_name , tokens ):
573+ tokens = self ._tokenize_alter_query (' ' .join (tokens ))
574+
476575 if len (tokens ) < 2 :
477576 raise Exception ('wrong tokens count' , tokens )
478577
0 commit comments