Skip to content

Commit d76ef3f

Browse files
committed
simplify trailing whitespace logic
1 parent fa70b10 commit d76ef3f

File tree

4 files changed

+139
-9
lines changed

4 files changed

+139
-9
lines changed

quaddtype/numpy_quaddtype/src/casts.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -225,8 +225,7 @@ unicode_to_quad_convert(const Py_UCS4 *ucs4_str, npy_intp unicode_size_chars,
225225
}
226226

227227
// Check that we parsed the entire string (skip trailing whitespace)
228-
while (*endptr == ' ' || *endptr == '\t' || *endptr == '\n' ||
229-
*endptr == '\r' || *endptr == '\f' || *endptr == '\v') {
228+
while (ascii_isspace(*endptr)) {
230229
endptr++;
231230
}
232231

quaddtype/numpy_quaddtype/src/utilities.c

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,32 +5,31 @@
55
#include "utilities.h"
66
#include "constants.hpp"
77

8-
// Locale-independent ASCII character classification helpers
9-
static int
8+
int
109
ascii_isspace(int c)
1110
{
1211
return c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v';
1312
}
1413

15-
static int
14+
int
1615
ascii_isalpha(char c)
1716
{
1817
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
1918
}
2019

21-
static int
20+
int
2221
ascii_isdigit(char c)
2322
{
2423
return (c >= '0' && c <= '9');
2524
}
2625

27-
static int
26+
int
2827
ascii_isalnum(char c)
2928
{
3029
return ascii_isdigit(c) || ascii_isalpha(c);
3130
}
3231

33-
static int
32+
int
3433
ascii_tolower(int c)
3534
{
3635
if (c >= 'A' && c <= 'Z') {
@@ -40,7 +39,7 @@ ascii_tolower(int c)
4039
}
4140

4241
// inspired from NumPyOS_ascii_strncasecmp
43-
static int
42+
int
4443
ascii_strncasecmp(const char *s1, const char *s2, size_t n)
4544
{
4645
while (n > 0 && *s1 != '\0' && *s2 != '\0') {

quaddtype/numpy_quaddtype/src/utilities.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,17 @@ extern "C" {
1111
#include <stdbool.h>
1212

1313
int cstring_to_quad(const char *str, QuadBackendType backend, quad_value *out_value, char **endptr, bool require_full_parse);
14+
int ascii_isspace(int c);
15+
int ascii_isalpha(char c);
16+
int ascii_isdigit(char c);
17+
int ascii_isalnum(char c);
18+
int ascii_tolower(int c);
19+
int ascii_strncasecmp(const char *s1, const char *s2, size_t n);
1420

1521
// Locale-independent ASCII string to quad parser (inspired by NumPyOS_ascii_strtold)
1622
int NumPyOS_ascii_strtoq(const char *s, QuadBackendType backend, quad_value *out_value, char **endptr);
1723

24+
1825
// Helper function: Convert quad_value to Sleef_quad for Dragon4
1926
Sleef_quad
2027
quad_to_sleef_quad(const quad_value *in_val, QuadBackendType backend);

quaddtype/tests/test_quaddtype.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -773,6 +773,131 @@ def test_locale_independence(self):
773773
with pytest.raises(ValueError):
774774
arr_comma.astype(QuadPrecDType())
775775

776+
@pytest.mark.parametrize("input_str,description", [
777+
(" 1.23 ", "space - leading and trailing"),
778+
("\t1.23\t", "tab - leading and trailing"),
779+
("\n1.23\n", "newline - leading and trailing"),
780+
("\r1.23\r", "carriage return - leading and trailing"),
781+
("\v1.23\v", "vertical tab - leading and trailing"),
782+
("\f1.23\f", "form feed - leading and trailing"),
783+
(" \t\n\r\v\f1.23 \t\n\r\v\f", "all 6 whitespace chars - mixed"),
784+
("\t\t\t3.14\t\t\t", "multiple tabs"),
785+
(" inf ", "infinity with spaces"),
786+
("\t\t-inf\t\t", "negative infinity with tabs"),
787+
("\n\nnan\n\n", "nan with newlines"),
788+
("\r\r-nan\r\r", "negative nan with carriage returns"),
789+
("\v\v1e10\v\v", "scientific notation with vertical tabs"),
790+
("\f\f-1.23e-45\f\f", "negative scientific with form feeds"),
791+
])
792+
def test_all_six_whitespace_characters(self, input_str, description):
793+
"""Test all 6 ASCII whitespace characters (space, tab, newline, carriage return, vertical tab, form feed)
794+
795+
This tests the ascii_isspace() helper function in casts.cpp which matches
796+
CPython's Py_ISSPACE and NumPy's NumPyOS_ascii_isspace behavior.
797+
The 6 characters are: 0x09(\t), 0x0A(\n), 0x0B(\v), 0x0C(\f), 0x0D(\r), 0x20(space)
798+
"""
799+
arr = np.array([input_str], dtype='U50')
800+
result = arr.astype(QuadPrecDType())
801+
802+
# Should successfully parse without errors
803+
result_val = str(result[0])
804+
assert result_val, f"Failed to parse with {description}"
805+
806+
# Verify the value is correct (strip whitespace and compare)
807+
stripped = input_str.strip(' \t\n\r\v\f')
808+
expected_arr = np.array([stripped], dtype='U50')
809+
expected = expected_arr.astype(QuadPrecDType())
810+
811+
if np.isnan(float(str(expected[0]))):
812+
assert np.isnan(float(str(result[0]))), f"NaN parsing failed for {description}"
813+
elif np.isinf(float(str(expected[0]))):
814+
assert np.isinf(float(str(result[0]))), f"Inf parsing failed for {description}"
815+
assert np.sign(float(str(expected[0]))) == np.sign(float(str(result[0]))), \
816+
f"Inf sign mismatch for {description}"
817+
else:
818+
assert result[0] == expected[0], f"Value mismatch for {description}"
819+
820+
@pytest.mark.parametrize("invalid_str,description", [
821+
("1.23 abc", "trailing non-whitespace after number"),
822+
(" 1.23xyz ", "trailing garbage with surrounding whitespace"),
823+
("abc 123", "leading garbage before number"),
824+
("1.23\x01", "control char (SOH) after number"),
825+
("1.23 a", "letter after multiple spaces"),
826+
("\t1.23\tabc\t", "tabs with garbage in middle"),
827+
])
828+
def test_whitespace_with_invalid_trailing_content(self, invalid_str, description):
829+
"""Test that strings with invalid trailing content are rejected even with whitespace
830+
831+
This ensures the trailing whitespace check in casts.cpp properly validates
832+
that only whitespace follows the parsed number, not other characters.
833+
"""
834+
arr = np.array([invalid_str], dtype='U50')
835+
836+
with pytest.raises(ValueError, match="could not convert string to QuadPrecision"):
837+
arr.astype(QuadPrecDType())
838+
839+
def test_empty_string_and_whitespace_only(self):
840+
"""Test that empty strings and whitespace-only strings raise errors"""
841+
test_cases = [
842+
"", # Empty string
843+
" ", # Single space
844+
" ", # Multiple spaces
845+
"\t", # Single tab
846+
"\n", # Single newline
847+
"\r", # Single carriage return
848+
"\v", # Single vertical tab
849+
"\f", # Single form feed
850+
" \t\n\r\v\f", # All whitespace characters
851+
" \t\t\n\n ", # Mixed whitespace
852+
]
853+
854+
for test_str in test_cases:
855+
arr = np.array([test_str], dtype='U20')
856+
with pytest.raises(ValueError, match="could not convert string to QuadPrecision"):
857+
arr.astype(QuadPrecDType())
858+
859+
@pytest.mark.parametrize("boundary_str,description", [
860+
("1e4932", "near max exponent for quad precision"),
861+
("1e-4932", "near min exponent for quad precision"),
862+
("1.189731495357231765085759326628007016196477" + "e4932", "very large number"),
863+
("3.362103143112093506262677817321752602596e-4932", "very small number"),
864+
("-1.189731495357231765085759326628007016196477" + "e4932", "very large negative"),
865+
("-3.362103143112093506262677817321752602596e-4932", "very small negative"),
866+
])
867+
def test_extreme_exponent_values(self, boundary_str, description):
868+
"""Test parsing of numbers with extreme exponents near quad precision limits
869+
870+
IEEE 754 binary128 has exponent range of approximately ±4932
871+
"""
872+
arr = np.array([boundary_str], dtype='U100')
873+
result = arr.astype(QuadPrecDType())
874+
875+
# Should parse successfully (may result in inf for overflow cases)
876+
result_str = str(result[0])
877+
assert result_str, f"Failed to parse {description}"
878+
879+
@pytest.mark.parametrize("precision_str", [
880+
"3.141592653589793238462643383279502884197", # 36 digits (quad precision)
881+
"2.718281828459045235360287471352662497757", # e to 36 digits
882+
"1.414213562373095048801688724209698078569", # sqrt(2) to 36 digits
883+
"-1.732050807568877293527446341505872366942", # -sqrt(3) to 36 digits
884+
])
885+
def test_full_precision_parsing(self, precision_str):
886+
"""Test that strings with full quad precision (36 decimal digits) parse correctly
887+
888+
This ensures the full precision is preserved during string -> quad conversion
889+
"""
890+
arr = np.array([precision_str], dtype='U50')
891+
result = arr.astype(QuadPrecDType())
892+
893+
# Convert back to string and verify roundtrip preserves precision
894+
back_to_str = result.astype('U50')
895+
roundtrip = back_to_str.astype(QuadPrecDType())
896+
897+
# Roundtrip should preserve the value
898+
assert result[0] == roundtrip[0], \
899+
f"Precision lost in roundtrip for {precision_str}"
900+
776901

777902
def test_basic_equality():
778903
assert QuadPrecision("12") == QuadPrecision(

0 commit comments

Comments
 (0)