-
-
Notifications
You must be signed in to change notification settings - Fork 19.1k
PERF: fix performance regression from #62542 #62623
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 11 commits
be21b2e
fc10a5f
ab2fab8
7e8033d
5219386
4ff07e3
c7fc292
4c8d770
35f075a
448f944
cf0a26d
2e5a47c
ca32c01
46c9883
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1834,6 +1834,34 @@ int uint64_conflict(uint_state *self) { | |
return self->seen_uint && (self->seen_sint || self->seen_null); | ||
} | ||
|
||
/** | ||
* @brief Validates that a string contains only numeric digits. | ||
* | ||
* This function is used after an integer overflow, | ||
* where is checks the rest of the string for a non-numeric character. | ||
* | ||
* Pure integer overflows during CSV parsing are converted to PyLongObjects, | ||
* while, if any invalid character is found, it skips integer | ||
* parsing and tries other conversion methods. | ||
* | ||
* @param p_item Pointer to the string to validate for numeric format | ||
* | ||
* @return Integer 0 if the remainder of the string contains only digits, | ||
* otherwise returns the error code for [ERROR_INVALID_CHARS]. | ||
*/ | ||
static inline int check_for_invalid_char(const char *p_item) { | ||
while (*p_item != '\0' && isdigit_ascii(*p_item)) { | ||
p_item++; | ||
} | ||
|
||
// check if reached the end of string after consuming all digits | ||
if (*p_item != '\0') { | ||
return ERROR_INVALID_CHARS; | ||
} | ||
|
||
return 0; | ||
} | ||
|
||
int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max, | ||
int *error, char tsep) { | ||
const char *p = p_item; | ||
|
@@ -1879,6 +1907,10 @@ int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max, | |
d = *++p; | ||
} else { | ||
*error = ERROR_OVERFLOW; | ||
int status = check_for_invalid_char(p); | ||
|
||
if (status != 0) { | ||
*error = status; | ||
} | ||
return 0; | ||
} | ||
} | ||
|
@@ -1890,6 +1922,10 @@ int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max, | |
d = *++p; | ||
} else { | ||
*error = ERROR_OVERFLOW; | ||
int status = check_for_invalid_char(p); | ||
if (status != 0) { | ||
*error = status; | ||
} | ||
return 0; | ||
} | ||
} | ||
|
@@ -1917,6 +1953,10 @@ int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max, | |
|
||
} else { | ||
*error = ERROR_OVERFLOW; | ||
int status = check_for_invalid_char(p); | ||
if (status != 0) { | ||
*error = status; | ||
} | ||
return 0; | ||
} | ||
} | ||
|
@@ -1929,6 +1969,10 @@ int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max, | |
|
||
} else { | ||
*error = ERROR_OVERFLOW; | ||
int status = check_for_invalid_char(p); | ||
if (status != 0) { | ||
*error = status; | ||
} | ||
return 0; | ||
} | ||
} | ||
|
@@ -1997,6 +2041,10 @@ uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max, | |
|
||
} else { | ||
*error = ERROR_OVERFLOW; | ||
int status = check_for_invalid_char(p); | ||
if (status != 0) { | ||
*error = status; | ||
} | ||
return 0; | ||
} | ||
} | ||
|
@@ -2009,6 +2057,10 @@ uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max, | |
|
||
} else { | ||
*error = ERROR_OVERFLOW; | ||
int status = check_for_invalid_char(p); | ||
if (status != 0) { | ||
*error = status; | ||
} | ||
return 0; | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add the length of the string as an argument? I realize this is a static function, but its still best to guard against buffer overruns in case of future refactor
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This information is not available in any of the parent functions. So I would have to call
strlen
to use it. I don't see much value in it.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Its about minimizing the risk during refactor. C is not an inherently safe language, so you need to be somewhat paranoid when writing functions.
You are correct in that at face value calling
strlen
is pretty...well dumb. But its a sign that a refactor can happen in another PR to better keep track of the length of a string while processing it