@@ -1078,46 +1078,121 @@ def _strip_comments(self, create_statement):
10781078 """
10791079 Strip COMMENT clauses from CREATE TABLE statements.
10801080 Handles MySQL-style quote escaping where quotes are doubled ('' or "").
1081+
1082+ This function properly parses SQL syntax to distinguish between:
1083+ - COMMENT clauses (which should be removed)
1084+ - String literals containing "COMMENT" (which should be preserved)
1085+ - Identifiers containing "comment" (which should be preserved)
10811086 """
10821087 result = []
10831088 i = 0
1089+
10841090 while i < len (create_statement ):
1085- # Look for COMMENT keyword (case insensitive)
1086- if (i + 7 < len (create_statement ) and
1091+ char = create_statement [i ]
1092+
1093+ # Handle string literals (single quotes)
1094+ if char == "'" :
1095+ result .append (char )
1096+ i += 1
1097+ # Copy the entire string literal, handling escaped quotes
1098+ while i < len (create_statement ):
1099+ char = create_statement [i ]
1100+ result .append (char )
1101+ if char == "'" :
1102+ # Check if this is an escaped quote (doubled)
1103+ if i + 1 < len (create_statement ) and create_statement [i + 1 ] == "'" :
1104+ i += 1 # Skip to the second quote
1105+ result .append (create_statement [i ]) # Add the second quote
1106+ else :
1107+ i += 1 # End of string literal
1108+ break
1109+ i += 1
1110+ continue
1111+
1112+ # Handle string literals (double quotes)
1113+ if char == '"' :
1114+ result .append (char )
1115+ i += 1
1116+ # Copy the entire string literal, handling escaped quotes
1117+ while i < len (create_statement ):
1118+ char = create_statement [i ]
1119+ result .append (char )
1120+ if char == '"' :
1121+ # Check if this is an escaped quote (doubled)
1122+ if i + 1 < len (create_statement ) and create_statement [i + 1 ] == '"' :
1123+ i += 1 # Skip to the second quote
1124+ result .append (create_statement [i ]) # Add the second quote
1125+ else :
1126+ i += 1 # End of string literal
1127+ break
1128+ i += 1
1129+ continue
1130+
1131+ # Handle backtick-quoted identifiers
1132+ if char == '`' :
1133+ result .append (char )
1134+ i += 1
1135+ # Copy the entire identifier
1136+ while i < len (create_statement ):
1137+ char = create_statement [i ]
1138+ result .append (char )
1139+ if char == '`' :
1140+ i += 1 # End of identifier
1141+ break
1142+ i += 1
1143+ continue
1144+
1145+ # Look for COMMENT keyword (case insensitive) outside of quotes
1146+ if (i + 7 <= len (create_statement ) and
10871147 create_statement [i :i + 7 ].upper () == 'COMMENT' and
1088- (i == 0 or ( not create_statement [i - 1 ].isalnum () and create_statement [ i - 1 ] != '`' )) and
1148+ (i == 0 or not create_statement [i - 1 ].isalnum ()) and
10891149 (i + 7 >= len (create_statement ) or not create_statement [i + 7 ].isalnum ())):
10901150
1151+ # This looks like a COMMENT keyword, but we need to verify it's actually
1152+ # a COMMENT clause and not just an identifier that happens to be "comment"
1153+
10911154 # Skip COMMENT keyword
1092- i += 7
1155+ j = i + 7
10931156
10941157 # Skip whitespace and optional '='
1095- while i < len (create_statement ) and create_statement [i ].isspace ():
1096- i += 1
1097- if i < len (create_statement ) and create_statement [i ] == '=' :
1098- i += 1
1099- while i < len (create_statement ) and create_statement [i ].isspace ():
1100- i += 1
1158+ while j < len (create_statement ) and create_statement [j ].isspace ():
1159+ j += 1
1160+ if j < len (create_statement ) and create_statement [j ] == '=' :
1161+ j += 1
1162+ while j < len (create_statement ) and create_statement [j ].isspace ():
1163+ j += 1
11011164
1102- # Find the quoted string
1103- if i < len (create_statement ) and create_statement [i ] in ('"' , "'" ):
1104- quote_char = create_statement [i ]
1105- i += 1 # Skip opening quote
1165+ # Check if this is followed by a quoted string (indicating a COMMENT clause)
1166+ if j < len (create_statement ) and create_statement [j ] in ('"' , "'" ):
1167+ # This is a COMMENT clause - skip it entirely
1168+ quote_char = create_statement [j ]
1169+ j += 1 # Skip opening quote
11061170
11071171 # Find the closing quote, handling escaped quotes
1108- while i < len (create_statement ):
1109- if create_statement [i ] == quote_char :
1172+ while j < len (create_statement ):
1173+ if create_statement [j ] == quote_char :
11101174 # Check if this is an escaped quote (doubled)
1111- if i + 1 < len (create_statement ) and create_statement [i + 1 ] == quote_char :
1112- i += 2 # Skip both quotes
1175+ if j + 1 < len (create_statement ) and create_statement [j + 1 ] == quote_char :
1176+ j += 2 # Skip both quotes
11131177 else :
1114- i += 1 # Skip closing quote
1178+ j += 1 # Skip closing quote
11151179 break
11161180 else :
1117- i += 1
1118- else :
1119- result .append (create_statement [i ])
1120- i += 1
1181+ j += 1
1182+
1183+ # Skip the entire COMMENT clause
1184+ i = j
1185+ continue
1186+ else :
1187+ # This is not a COMMENT clause (no quoted string follows)
1188+ # Treat it as a regular identifier
1189+ result .append (char )
1190+ i += 1
1191+ continue
1192+
1193+ # Regular character - just copy it
1194+ result .append (char )
1195+ i += 1
11211196
11221197 return '' .join (result )
11231198
0 commit comments