@@ -121,38 +121,88 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
121121 }
122122 PyObject * res = NULL ;
123123
124- // Check if there is a # character in the expression
124+ // Look for a # character outside of string literals
125125 int hash_detected = 0 ;
126+ int in_string = 0 ;
127+ char quote_char = 0 ;
128+
126129 for (Py_ssize_t i = 0 ; i < tok_mode -> last_expr_size - tok_mode -> last_expr_end ; i ++ ) {
127- if (tok_mode -> last_expr_buffer [i ] == '#' ) {
130+ char ch = tok_mode -> last_expr_buffer [i ];
131+
132+ // Skip escaped characters
133+ if (ch == '\\' ) {
134+ i ++ ;
135+ continue ;
136+ }
137+
138+ // Handle quotes
139+ if (ch == '"' || ch == '\'' ) {
140+ // The following if/else block works becase there is an off number
141+ // of quotes in STRING tokens and the lexer only ever reaches this
142+ // function with valid STRING tokens.
143+ // For example: """hello"""
144+ // First quote: in_string = 1
145+ // Second quote: in_string = 0
146+ // Third quote: in_string = 1
147+ if (!in_string ) {
148+ in_string = 1 ;
149+ quote_char = ch ;
150+ }
151+ else if (ch == quote_char ) {
152+ in_string = 0 ;
153+ }
154+ continue ;
155+ }
156+
157+ // Check for # outside strings
158+ if (ch == '#' && !in_string ) {
128159 hash_detected = 1 ;
129160 break ;
130161 }
131162 }
132-
163+ // If we found a # character in the expression, we need to handle comments
133164 if (hash_detected ) {
134- Py_ssize_t input_length = tok_mode -> last_expr_size - tok_mode -> last_expr_end ;
135- char * result = (char * )PyMem_Malloc ((input_length + 1 ) * sizeof (char ));
165+ // Allocate buffer for processed result
166+ char * result = (char * )PyMem_Malloc ((tok_mode -> last_expr_size - tok_mode -> last_expr_end + 1 ) * sizeof (char ));
136167 if (!result ) {
137168 return -1 ;
138169 }
139170
140- Py_ssize_t i = 0 ;
141- Py_ssize_t j = 0 ;
171+ Py_ssize_t i = 0 ; // Input position
172+ Py_ssize_t j = 0 ; // Output position
173+ in_string = 0 ; // Whether we're in a string
174+ quote_char = 0 ; // Current string quote char
142175
143- for (i = 0 , j = 0 ; i < input_length ; i ++ ) {
144- if (tok_mode -> last_expr_buffer [i ] == '#' ) {
145- // Skip characters until newline or end of string
146- while (i < input_length && tok_mode -> last_expr_buffer [i ] != '\0' ) {
147- if (tok_mode -> last_expr_buffer [i ] == '\n' ) {
148- result [j ++ ] = tok_mode -> last_expr_buffer [i ];
149- break ;
150- }
176+ // Process each character
177+ while (i < tok_mode -> last_expr_size - tok_mode -> last_expr_end ) {
178+ char ch = tok_mode -> last_expr_buffer [i ];
179+
180+ // Handle string quotes
181+ if (ch == '"' || ch == '\'' ) {
182+ // See comment above to understand this part
183+ if (!in_string ) {
184+ in_string = 1 ;
185+ quote_char = ch ;
186+ } else if (ch == quote_char ) {
187+ in_string = 0 ;
188+ }
189+ result [j ++ ] = ch ;
190+ }
191+ // Skip comments
192+ else if (ch == '#' && !in_string ) {
193+ while (i < tok_mode -> last_expr_size - tok_mode -> last_expr_end &&
194+ tok_mode -> last_expr_buffer [i ] != '\n' ) {
151195 i ++ ;
152196 }
153- } else {
154- result [j ++ ] = tok_mode -> last_expr_buffer [i ];
197+ if (i < tok_mode -> last_expr_size - tok_mode -> last_expr_end ) {
198+ result [j ++ ] = '\n' ;
199+ }
200+ }
201+ // Copy other chars
202+ else {
203+ result [j ++ ] = ch ;
155204 }
205+ i ++ ;
156206 }
157207
158208 result [j ] = '\0' ; // Null-terminate the result string
@@ -164,11 +214,9 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
164214 tok_mode -> last_expr_size - tok_mode -> last_expr_end ,
165215 NULL
166216 );
167-
168217 }
169218
170-
171- if (!res ) {
219+ if (!res ) {
172220 return -1 ;
173221 }
174222 token -> metadata = res ;
0 commit comments