@@ -118,38 +118,88 @@ set_fstring_expr(struct tok_state* tok, struct token *token, char c) {
118118 }
119119 PyObject * res = NULL ;
120120
121- // Check if there is a # character in the expression
121+ // Look for a # character outside of string literals
122122 int hash_detected = 0 ;
123+ int in_string = 0 ;
124+ char quote_char = 0 ;
125+
123126 for (Py_ssize_t i = 0 ; i < tok_mode -> last_expr_size - tok_mode -> last_expr_end ; i ++ ) {
124- if (tok_mode -> last_expr_buffer [i ] == '#' ) {
127+ char ch = tok_mode -> last_expr_buffer [i ];
128+
129+ // Skip escaped characters
130+ if (ch == '\\' ) {
131+ i ++ ;
132+ continue ;
133+ }
134+
135+ // Handle quotes
136+ if (ch == '"' || ch == '\'' ) {
137+ // The following if/else block works becase there is an off number
138+ // of quotes in STRING tokens and the lexer only ever reaches this
139+ // function with valid STRING tokens.
140+ // For example: """hello"""
141+ // First quote: in_string = 1
142+ // Second quote: in_string = 0
143+ // Third quote: in_string = 1
144+ if (!in_string ) {
145+ in_string = 1 ;
146+ quote_char = ch ;
147+ }
148+ else if (ch == quote_char ) {
149+ in_string = 0 ;
150+ }
151+ continue ;
152+ }
153+
154+ // Check for # outside strings
155+ if (ch == '#' && !in_string ) {
125156 hash_detected = 1 ;
126157 break ;
127158 }
128159 }
129-
160+ // If we found a # character in the expression, we need to handle comments
130161 if (hash_detected ) {
131- Py_ssize_t input_length = tok_mode -> last_expr_size - tok_mode -> last_expr_end ;
132- char * result = (char * )PyMem_Malloc ((input_length + 1 ) * sizeof (char ));
162+ // Allocate buffer for processed result
163+ char * result = (char * )PyMem_Malloc ((tok_mode -> last_expr_size - tok_mode -> last_expr_end + 1 ) * sizeof (char ));
133164 if (!result ) {
134165 return -1 ;
135166 }
136167
137- Py_ssize_t i = 0 ;
138- Py_ssize_t j = 0 ;
168+ Py_ssize_t i = 0 ; // Input position
169+ Py_ssize_t j = 0 ; // Output position
170+ in_string = 0 ; // Whether we're in a string
171+ quote_char = 0 ; // Current string quote char
139172
140- for (i = 0 , j = 0 ; i < input_length ; i ++ ) {
141- if (tok_mode -> last_expr_buffer [i ] == '#' ) {
142- // Skip characters until newline or end of string
143- while (i < input_length && tok_mode -> last_expr_buffer [i ] != '\0' ) {
144- if (tok_mode -> last_expr_buffer [i ] == '\n' ) {
145- result [j ++ ] = tok_mode -> last_expr_buffer [i ];
146- break ;
147- }
173+ // Process each character
174+ while (i < tok_mode -> last_expr_size - tok_mode -> last_expr_end ) {
175+ char ch = tok_mode -> last_expr_buffer [i ];
176+
177+ // Handle string quotes
178+ if (ch == '"' || ch == '\'' ) {
179+ // See comment above to understand this part
180+ if (!in_string ) {
181+ in_string = 1 ;
182+ quote_char = ch ;
183+ } else if (ch == quote_char ) {
184+ in_string = 0 ;
185+ }
186+ result [j ++ ] = ch ;
187+ }
188+ // Skip comments
189+ else if (ch == '#' && !in_string ) {
190+ while (i < tok_mode -> last_expr_size - tok_mode -> last_expr_end &&
191+ tok_mode -> last_expr_buffer [i ] != '\n' ) {
148192 i ++ ;
149193 }
150- } else {
151- result [j ++ ] = tok_mode -> last_expr_buffer [i ];
194+ if (i < tok_mode -> last_expr_size - tok_mode -> last_expr_end ) {
195+ result [j ++ ] = '\n' ;
196+ }
197+ }
198+ // Copy other chars
199+ else {
200+ result [j ++ ] = ch ;
152201 }
202+ i ++ ;
153203 }
154204
155205 result [j ] = '\0' ; // Null-terminate the result string
@@ -161,11 +211,9 @@ set_fstring_expr(struct tok_state* tok, struct token *token, char c) {
161211 tok_mode -> last_expr_size - tok_mode -> last_expr_end ,
162212 NULL
163213 );
164-
165214 }
166215
167-
168- if (!res ) {
216+ if (!res ) {
169217 return -1 ;
170218 }
171219 token -> metadata = res ;
0 commit comments