Skip to content

Commit d76ff5f

Browse files
committed
fix: change solution to safe guard against end_ptr
1 parent d026b01 commit d76ff5f

File tree

1 file changed

+26
-27
lines changed

1 file changed

+26
-27
lines changed

pandas/_libs/src/parser/tokenizer.c

Lines changed: 26 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1881,42 +1881,41 @@ static inline bool has_only_spaces(const char *str) {
18811881
static int copy_string_without_char(char output[PROCESSED_WORD_CAPACITY],
18821882
const char *str, size_t str_len,
18831883
char char_to_remove) {
1884-
// last character is reserved for null terminator.
1885-
size_t max_str_size = PROCESSED_WORD_CAPACITY - 1;
1886-
if (str_len > max_str_size) {
1887-
// str_len is too big.
1888-
// Check if it's possible to write after removing all `char_to_remove`.
1889-
size_t count_char_to_remove = 0;
1890-
for (const char *src = str; *src != '\0'; src++) {
1891-
if (*src == char_to_remove) {
1892-
count_char_to_remove++;
1893-
}
1894-
}
1884+
const char *left = str;
1885+
const char *right;
1886+
const char *end_ptr = str + str_len;
1887+
size_t bytes_read = 0;
18951888

1896-
if (str_len - count_char_to_remove > max_str_size) {
1889+
while ((right = memchr(left, char_to_remove, end_ptr - left)) != NULL) {
1890+
size_t nbytes = right - left;
1891+
1892+
// check if we have enough space, including the null terminator.
1893+
if (nbytes + bytes_read >= PROCESSED_WORD_CAPACITY) {
18971894
return ERROR_WORD2BIG;
18981895
}
1899-
}
1900-
1901-
char *dst = output;
1902-
const char *left = str;
1903-
1904-
// sliding window
1905-
for (const char *right = str; *left != '\0'; right++) {
1906-
if (*right == '\0' || *right == char_to_remove) {
1907-
size_t len = right - left;
1896+
// copy block
1897+
memcpy(&output[bytes_read], left, nbytes);
1898+
bytes_read += nbytes;
1899+
left = right + 1;
19081900

1909-
// copy block
1910-
memcpy(dst, left, len);
1901+
// Exit after processing the entire string
1902+
if (left >= end_ptr) {
1903+
break;
1904+
}
1905+
}
19111906

1912-
// go to next available location to write
1913-
dst += len;
1914-
left = *right == '\0' ? right : right + 1;
1907+
// copy final chunk that doesn't contain char_to_remove
1908+
if (end_ptr > left) {
1909+
size_t nbytes = nbytes = end_ptr - left;
1910+
if (nbytes + bytes_read >= PROCESSED_WORD_CAPACITY) {
1911+
return ERROR_WORD2BIG;
19151912
}
1913+
memcpy(&output[bytes_read], left, nbytes);
1914+
bytes_read += nbytes;
19161915
}
19171916

19181917
// null terminate
1919-
*dst = '\0';
1918+
output[bytes_read] = '\0';
19201919
return 0;
19211920
}
19221921

0 commit comments

Comments
 (0)