@@ -492,13 +492,13 @@ def test_multi_column_erase():
492492
493493 prepare_env (cfg , mysql , ch )
494494
495- mysql .execute (f'''
495+ mysql .execute (f"""
496496CREATE TABLE `{ TEST_TABLE_NAME } ` (
497- departments int(11) NOT NULL,
498- termine int(11) NOT NULL,
497+ departments int(11) NOT NULL COMMENT '事件类型,可选值: ''SYSTEM'', ''BUSINESS''' ,
498+ termine int(11) NOT NULL COMMENT '事件类型,可选值: ''SYSTEM'', ''BUSINESS''' ,
499499 PRIMARY KEY (departments,termine)
500500)
501- ''' )
501+ """ )
502502
503503
504504 mysql .execute (f"INSERT INTO `{ TEST_TABLE_NAME } ` (departments, termine) VALUES (10, 20);" , commit = True )
@@ -3082,3 +3082,143 @@ def test_resume_initial_replication_with_ignore_deletes():
30823082 finally :
30833083 # Clean up temp config file
30843084 os .unlink (config_file )
3085+
3086+
3087+ @pytest .mark .parametrize ("input_sql,expected_output" , [
3088+ # Basic single quote comment
3089+ (
3090+ "CREATE TABLE test (id int NOT NULL COMMENT 'Simple comment', name varchar(255))" ,
3091+ "CREATE TABLE test (id int NOT NULL , name varchar(255))"
3092+ ),
3093+ # Basic double quote comment
3094+ (
3095+ "CREATE TABLE test (id int NOT NULL COMMENT \" Simple comment\" , name varchar(255))" ,
3096+ "CREATE TABLE test (id int NOT NULL , name varchar(255))"
3097+ ),
3098+ # Comment with escaped single quotes (the original bug case)
3099+ (
3100+ "CREATE TABLE test (id int NOT NULL COMMENT '事件类型,可选值: ''SYSTEM'', ''BUSINESS''', name varchar(255))" ,
3101+ "CREATE TABLE test (id int NOT NULL , name varchar(255))"
3102+ ),
3103+ # Comment with escaped double quotes
3104+ (
3105+ "CREATE TABLE test (id int NOT NULL COMMENT \" Value can be: \" \" ACTIVE\" \" or \" \" INACTIVE\" \" \" , name varchar(255))" ,
3106+ "CREATE TABLE test (id int NOT NULL , name varchar(255))"
3107+ ),
3108+ # Multiple comments in same table
3109+ (
3110+ """CREATE TABLE test (
3111+ id int NOT NULL COMMENT 'Primary key',
3112+ name varchar(255) COMMENT 'User name',
3113+ status enum('active','inactive') COMMENT 'Status with ''quotes'''
3114+ )""" ,
3115+ """CREATE TABLE test (
3116+ id int NOT NULL ,
3117+ name varchar(255) ,
3118+ status enum('active','inactive')
3119+ )"""
3120+ ),
3121+ # Comment with COMMENT = syntax
3122+ (
3123+ "CREATE TABLE test (id int NOT NULL COMMENT = 'Primary key', name varchar(255))" ,
3124+ "CREATE TABLE test (id int NOT NULL , name varchar(255))"
3125+ ),
3126+ # Comment with mixed quotes and special characters
3127+ (
3128+ "CREATE TABLE test (id int COMMENT 'Mixed: ''single'', \" double\" , and `backtick`', name text)" ,
3129+ "CREATE TABLE test (id int , name text)"
3130+ ),
3131+ # Multiline comment
3132+ (
3133+ """CREATE TABLE test (
3134+ id int NOT NULL COMMENT 'This is a
3135+ multiline comment
3136+ with newlines',
3137+ name varchar(255)
3138+ )""" ,
3139+ """CREATE TABLE test (
3140+ id int NOT NULL ,
3141+ name varchar(255)
3142+ )"""
3143+ ),
3144+ # Comment with Unicode characters
3145+ (
3146+ "CREATE TABLE test (id int COMMENT '用户ID - 主键', name varchar(255) COMMENT 'Имя пользователя')" ,
3147+ "CREATE TABLE test (id int , name varchar(255) )"
3148+ ),
3149+ # No comments (should remain unchanged)
3150+ (
3151+ "CREATE TABLE test (id int NOT NULL, name varchar(255))" ,
3152+ "CREATE TABLE test (id int NOT NULL, name varchar(255))"
3153+ ),
3154+ # Comment at table level
3155+ (
3156+ "CREATE TABLE test (id int NOT NULL, name varchar(255)) COMMENT 'Table comment'" ,
3157+ "CREATE TABLE test (id int NOT NULL, name varchar(255)) "
3158+ ),
3159+ # Complex case with multiple escaped quotes and special characters
3160+ (
3161+ """CREATE TABLE test (
3162+ departments int(11) NOT NULL COMMENT '事件类型,可选值: ''SYSTEM'', ''BUSINESS''',
3163+ termine int(11) NOT NULL COMMENT '事件类型,可选值: ''SYSTEM'', ''BUSINESS''',
3164+ PRIMARY KEY (departments,termine)
3165+ )""" ,
3166+ """CREATE TABLE test (
3167+ departments int(11) NOT NULL ,
3168+ termine int(11) NOT NULL ,
3169+ PRIMARY KEY (departments,termine)
3170+ )"""
3171+ ),
3172+ # Comment with JSON-like content
3173+ (
3174+ "CREATE TABLE test (config json COMMENT '{\" type\" : \" config\" , \" values\" : [\" a\" , \" b\" ]}', id int)" ,
3175+ "CREATE TABLE test (config json , id int)"
3176+ ),
3177+ # Comment with SQL injection-like content (should be safely handled)
3178+ (
3179+ "CREATE TABLE test (id int COMMENT 'DROP TABLE users; --', name varchar(255))" ,
3180+ "CREATE TABLE test (id int , name varchar(255))"
3181+ ),
3182+ # Empty comment
3183+ (
3184+ "CREATE TABLE test (id int COMMENT '', name varchar(255))" ,
3185+ "CREATE TABLE test (id int , name varchar(255))"
3186+ ),
3187+ # Comment with only spaces
3188+ (
3189+ "CREATE TABLE test (id int COMMENT ' ', name varchar(255))" ,
3190+ "CREATE TABLE test (id int , name varchar(255))"
3191+ ),
3192+ # Case insensitive COMMENT keyword
3193+ (
3194+ "CREATE TABLE test (id int comment 'lowercase', name varchar(255) Comment 'Mixed case')" ,
3195+ "CREATE TABLE test (id int , name varchar(255) )"
3196+ ),
3197+ ])
3198+ def test_strip_comments_function (input_sql , expected_output ):
3199+ """
3200+ Test the _strip_comments function with various realistic scenarios.
3201+
3202+ This test covers:
3203+ - Basic single and double quoted comments
3204+ - Escaped quotes within comments (MySQL style with doubled quotes)
3205+ - Multiple comments in the same table
3206+ - COMMENT = syntax
3207+ - Multiline comments with newlines
3208+ - Unicode characters in comments
3209+ - Table-level comments
3210+ - Complex real-world scenarios
3211+ - Edge cases like empty comments and case variations
3212+ """
3213+ from mysql_ch_replicator .converter import MysqlToClickhouseConverter
3214+
3215+ converter = MysqlToClickhouseConverter ()
3216+ result = converter ._strip_comments (input_sql )
3217+
3218+ # Normalize whitespace for comparison (remove extra spaces that might be left behind)
3219+ def normalize_whitespace (text ):
3220+ import re
3221+ # Replace multiple spaces with single space, but preserve newlines
3222+ return re .sub (r'[ \t]+' , ' ' , text ).strip ()
3223+
3224+ assert normalize_whitespace (result ) == normalize_whitespace (expected_output ), f"Failed for input: { input_sql } "
0 commit comments