Skip to content

Commit 0429656

Browse files
committed
add casts between string and float[16, 32, 64]
1 parent 41d428c commit 0429656

File tree

4 files changed

+237
-27
lines changed

4 files changed

+237
-27
lines changed

stringdtype/meson.build

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,13 @@ incdir_numpy = run_command(py,
1414
check: true
1515
).stdout().strip()
1616

17+
cc = meson.get_compiler('c')
18+
19+
npymath_path = incdir_numpy / '..' / 'lib'
20+
npymath_lib = cc.find_library('npymath', dirs: npymath_path)
21+
inc_np = include_directories(incdir_numpy)
22+
np_dep = declare_dependency(include_directories: inc_np)
23+
1724
includes = include_directories(
1825
[
1926
incdir_numpy,
@@ -47,5 +54,6 @@ py.extension_module(
4754
srcs,
4855
install: true,
4956
subdir: 'stringdtype',
50-
include_directories: includes
57+
include_directories: includes,
58+
dependencies: [np_dep, npymath_lib]
5159
)

stringdtype/stringdtype/src/casts.c

Lines changed: 208 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -519,6 +519,9 @@ static npy_longlong
519519
string_to_uint(char *in, npy_ulonglong *value)
520520
{
521521
PyObject *pylong_value = string_to_pylong(in);
522+
if (pylong_value == NULL) {
523+
return -1;
524+
}
522525
*value = PyLong_AsUnsignedLongLong(pylong_value);
523526
if (*value == (unsigned long long)-1 && PyErr_Occurred()) {
524527
Py_DECREF(pylong_value);
@@ -542,13 +545,13 @@ string_to_int(char *in, npy_longlong *value)
542545
}
543546

544547
static int
545-
pylong_to_string(PyObject *pylong_val, char *out)
548+
pyobj_to_string(PyObject *obj, char *out)
546549
{
547-
if (pylong_val == NULL) {
550+
if (obj == NULL) {
548551
return -1;
549552
}
550-
PyObject *pystr_val = PyObject_Str(pylong_val);
551-
Py_DECREF(pylong_val);
553+
PyObject *pystr_val = PyObject_Str(obj);
554+
Py_DECREF(obj);
552555
if (pystr_val == NULL) {
553556
return -1;
554557
}
@@ -573,14 +576,14 @@ static int
573576
int_to_string(long long in, char *out)
574577
{
575578
PyObject *pylong_val = PyLong_FromLongLong(in);
576-
return pylong_to_string(pylong_val, out);
579+
return pyobj_to_string(pylong_val, out);
577580
}
578581

579582
static int
580583
uint_to_string(unsigned long long in, char *out)
581584
{
582585
PyObject *pylong_val = PyLong_FromUnsignedLongLong(in);
583-
return pylong_to_string(pylong_val, out);
586+
return pyobj_to_string(pylong_val, out);
584587
}
585588

586589
#define STRING_INT_CASTS(typename, typekind, shortname, numpy_tag, \
@@ -663,6 +666,7 @@ uint_to_string(unsigned long long in, char *out)
663666
if (typekind##_to_string((longtype)*in, out) != 0) { \
664667
return -1; \
665668
} \
669+
\
666670
in += in_stride; \
667671
out += out_stride; \
668672
} \
@@ -678,7 +682,7 @@ uint_to_string(unsigned long long in, char *out)
678682
\
679683
static char *shortname##2s_name = "cast_" #typename "_to_StringDType";
680684

681-
#define INT_DTYPES_AND_CAST_SPEC(shortname, typename) \
685+
#define DTYPES_AND_CAST_SPEC(shortname, typename) \
682686
PyArray_DTypeMeta **s2##shortname##_dtypes = \
683687
get_dtypes(this, &PyArray_##typename##DType); \
684688
\
@@ -733,6 +737,175 @@ STRING_INT_CASTS(ulonglong, uint, ulonglong, NPY_ULONGLONG, llu, npy_ulonglong,
733737
unsigned long long)
734738
#endif
735739

740+
static PyObject *
741+
string_to_pyfloat(char *in)
742+
{
743+
ss *s = (ss *)in;
744+
if (ss_isnull(s)) {
745+
PyErr_SetString(
746+
PyExc_ValueError,
747+
"Arrays with missing data cannot be converted to integers");
748+
return NULL;
749+
}
750+
PyObject *val_obj = PyUnicode_FromStringAndSize(s->buf, s->len);
751+
if (val_obj == NULL) {
752+
return NULL;
753+
}
754+
PyObject *pyfloat_value = PyFloat_FromString(val_obj);
755+
Py_DECREF(val_obj);
756+
return pyfloat_value;
757+
}
758+
759+
#define STRING_TO_FLOAT_CAST(typename, shortname, isinf_name, \
760+
double_to_float) \
761+
static int string_to_##typename( \
762+
PyArrayMethod_Context * NPY_UNUSED(context), char *const data[], \
763+
npy_intp const dimensions[], npy_intp const strides[], \
764+
NpyAuxData *NPY_UNUSED(auxdata)) \
765+
{ \
766+
npy_intp N = dimensions[0]; \
767+
char *in = data[0]; \
768+
npy_##typename *out = (npy_##typename *)data[1]; \
769+
\
770+
npy_intp in_stride = strides[0]; \
771+
npy_intp out_stride = strides[1] / sizeof(npy_##typename); \
772+
\
773+
while (N--) { \
774+
PyObject *pyfloat_value = string_to_pyfloat(in); \
775+
if (pyfloat_value == NULL) { \
776+
return -1; \
777+
} \
778+
double dval = PyFloat_AS_DOUBLE(pyfloat_value); \
779+
npy_##typename fval = (double_to_float)(dval); \
780+
\
781+
if (NPY_UNLIKELY(isinf_name(fval) && !(npy_isinf(dval)))) { \
782+
/* we need to somehow use numpy's floating point error */ \
783+
/* handling, which supports lots more functionality but */ \
784+
/* isn't exposed in the C API */ \
785+
PyErr_SetString(PyExc_FloatingPointError, \
786+
"overflow encountered in cast"); \
787+
return -1; \
788+
} \
789+
\
790+
*out = fval; \
791+
\
792+
in += in_stride; \
793+
out += out_stride; \
794+
} \
795+
\
796+
return 0; \
797+
} \
798+
\
799+
static PyType_Slot s2##shortname##_slots[] = { \
800+
{NPY_METH_resolve_descriptors, \
801+
&string_to_##typename##_resolve_descriptors}, \
802+
{NPY_METH_strided_loop, &string_to_##typename}, \
803+
{0, NULL}}; \
804+
\
805+
static char *s2##shortname##_name = "cast_StringDType_to_" #typename;
806+
807+
#define STRING_TO_FLOAT_RESOLVE_DESCRIPTORS(typename, npy_typename) \
808+
static NPY_CASTING string_to_##typename##_resolve_descriptors( \
809+
PyObject *NPY_UNUSED(self), \
810+
PyArray_DTypeMeta *NPY_UNUSED(dtypes[2]), \
811+
PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2], \
812+
npy_intp *NPY_UNUSED(view_offset)) \
813+
{ \
814+
if (given_descrs[1] == NULL) { \
815+
loop_descrs[1] = PyArray_DescrNewFromType(NPY_##npy_typename); \
816+
} \
817+
else { \
818+
Py_INCREF(given_descrs[1]); \
819+
loop_descrs[1] = given_descrs[1]; \
820+
} \
821+
\
822+
Py_INCREF(given_descrs[0]); \
823+
loop_descrs[0] = given_descrs[0]; \
824+
\
825+
return NPY_UNSAFE_CASTING; \
826+
}
827+
828+
#define FLOAT_TO_STRING_CAST(typename, shortname, float_to_double) \
829+
static int typename##_to_string( \
830+
PyArrayMethod_Context *NPY_UNUSED(context), char *const data[], \
831+
npy_intp const dimensions[], npy_intp const strides[], \
832+
NpyAuxData *NPY_UNUSED(auxdata)) \
833+
{ \
834+
npy_intp N = dimensions[0]; \
835+
npy_##typename *in = (npy_##typename *)data[0]; \
836+
char *out = data[1]; \
837+
\
838+
npy_intp in_stride = strides[0] / sizeof(npy_##typename); \
839+
npy_intp out_stride = strides[1]; \
840+
\
841+
while (N--) { \
842+
PyObject *pyfloat_val = \
843+
PyFloat_FromDouble((float_to_double)(*in)); \
844+
if (pyobj_to_string(pyfloat_val, out) == -1) { \
845+
return -1; \
846+
} \
847+
\
848+
in += in_stride; \
849+
out += out_stride; \
850+
} \
851+
\
852+
return 0; \
853+
} \
854+
\
855+
static PyType_Slot shortname##2s_slots [] = { \
856+
{NPY_METH_resolve_descriptors, \
857+
&any_to_string_UNSAFE_resolve_descriptors}, \
858+
{NPY_METH_strided_loop, &typename##_to_string}, \
859+
{0, NULL}}; \
860+
\
861+
static char *shortname##2s_name = "cast_" #typename "_to_StringDType";
862+
863+
STRING_TO_FLOAT_RESOLVE_DESCRIPTORS(float64, DOUBLE)
864+
865+
static int
866+
string_to_float64(PyArrayMethod_Context *NPY_UNUSED(context),
867+
char *const data[], npy_intp const dimensions[],
868+
npy_intp const strides[], NpyAuxData *NPY_UNUSED(auxdata))
869+
{
870+
npy_intp N = dimensions[0];
871+
char *in = data[0];
872+
npy_float64 *out = (npy_float64 *)data[1];
873+
874+
npy_intp in_stride = strides[0];
875+
npy_intp out_stride = strides[1] / sizeof(npy_float64);
876+
877+
while (N--) {
878+
PyObject *pyfloat_value = string_to_pyfloat(in);
879+
if (pyfloat_value == NULL) {
880+
return -1;
881+
}
882+
*out = (npy_float64)PyFloat_AS_DOUBLE(pyfloat_value);
883+
Py_DECREF(pyfloat_value);
884+
885+
in += in_stride;
886+
out += out_stride;
887+
}
888+
889+
return 0;
890+
}
891+
892+
static PyType_Slot s2f64_slots[] = {
893+
{NPY_METH_resolve_descriptors, &string_to_float64_resolve_descriptors},
894+
{NPY_METH_strided_loop, &string_to_float64},
895+
{0, NULL}};
896+
897+
static char *s2f64_name = "cast_StringDType_to_float64";
898+
899+
FLOAT_TO_STRING_CAST(float64, f64, double)
900+
901+
STRING_TO_FLOAT_RESOLVE_DESCRIPTORS(float32, FLOAT)
902+
STRING_TO_FLOAT_CAST(float32, f32, npy_isinf, npy_float32)
903+
FLOAT_TO_STRING_CAST(float32, f32, double)
904+
905+
STRING_TO_FLOAT_RESOLVE_DESCRIPTORS(float16, HALF)
906+
STRING_TO_FLOAT_CAST(float16, f16, npy_half_isinf, npy_double_to_half)
907+
FLOAT_TO_STRING_CAST(float16, f16, npy_half_to_double)
908+
736909
PyArrayMethod_Spec *
737910
get_cast_spec(const char *name, NPY_CASTING casting,
738911
NPY_ARRAYMETHOD_FLAGS flags, PyArray_DTypeMeta **dtypes,
@@ -785,7 +958,7 @@ get_casts(PyArray_DTypeMeta *this, PyArray_DTypeMeta *other)
785958

786959
int is_pandas = (this == (PyArray_DTypeMeta *)&PandasStringDType);
787960

788-
int num_casts = 21;
961+
int num_casts = 27;
789962

790963
#if NPY_SIZEOF_BYTE == NPY_SIZEOF_SHORT
791964
num_casts += 4;
@@ -840,34 +1013,37 @@ get_casts(PyArray_DTypeMeta *this, PyArray_DTypeMeta *other)
8401013
b2s_name, NPY_SAFE_CASTING, NPY_METH_NO_FLOATINGPOINT_ERRORS,
8411014
b2s_dtypes, b2s_slots);
8421015

843-
INT_DTYPES_AND_CAST_SPEC(i8, Int8)
844-
INT_DTYPES_AND_CAST_SPEC(i16, Int16)
845-
INT_DTYPES_AND_CAST_SPEC(i32, Int32)
846-
INT_DTYPES_AND_CAST_SPEC(i64, Int64)
847-
INT_DTYPES_AND_CAST_SPEC(u8, UInt8)
848-
INT_DTYPES_AND_CAST_SPEC(u16, UInt16)
849-
INT_DTYPES_AND_CAST_SPEC(u32, UInt32)
850-
INT_DTYPES_AND_CAST_SPEC(u64, UInt64)
1016+
DTYPES_AND_CAST_SPEC(i8, Int8)
1017+
DTYPES_AND_CAST_SPEC(i16, Int16)
1018+
DTYPES_AND_CAST_SPEC(i32, Int32)
1019+
DTYPES_AND_CAST_SPEC(i64, Int64)
1020+
DTYPES_AND_CAST_SPEC(u8, UInt8)
1021+
DTYPES_AND_CAST_SPEC(u16, UInt16)
1022+
DTYPES_AND_CAST_SPEC(u32, UInt32)
1023+
DTYPES_AND_CAST_SPEC(u64, UInt64)
8511024
#if NPY_SIZEOF_BYTE == NPY_SIZEOF_SHORT
852-
INT_DTYPES_AND_CAST_SPEC(byte, Byte)
853-
INT_DTYPES_AND_CAST_SPEC(ubyte, UByte)
1025+
DTYPES_AND_CAST_SPEC(byte, Byte)
1026+
DTYPES_AND_CAST_SPEC(ubyte, UByte)
8541027
#endif
8551028
#if NPY_SIZEOF_SHORT == NPY_SIZEOF_INT
856-
INT_DTYPES_AND_CAST_SPEC(short, Short)
857-
INT_DTYPES_AND_CAST_SPEC(ushort, UShort)
1029+
DTYPES_AND_CAST_SPEC(short, Short)
1030+
DTYPES_AND_CAST_SPEC(ushort, UShort)
8581031
#endif
8591032
#if NPY_SIZEOF_INT == NPY_SIZEOF_LONG
860-
INT_DTYPES_AND_CAST_SPEC(int, Int)
861-
INT_DTYPES_AND_CAST_SPEC(uint, UInt)
1033+
DTYPES_AND_CAST_SPEC(int, Int)
1034+
DTYPES_AND_CAST_SPEC(uint, UInt)
8621035
#endif
8631036
#if NPY_SIZEOF_LONGLONG == NPY_SIZEOF_LONG
864-
INT_DTYPES_AND_CAST_SPEC(longlong, LongLong)
865-
INT_DTYPES_AND_CAST_SPEC(ulonglong, ULongLong)
1037+
DTYPES_AND_CAST_SPEC(longlong, LongLong)
1038+
DTYPES_AND_CAST_SPEC(ulonglong, ULongLong)
8661039
#endif
8671040

868-
PyArrayMethod_Spec **casts = NULL;
1041+
DTYPES_AND_CAST_SPEC(f64, Double)
1042+
DTYPES_AND_CAST_SPEC(f32, Float)
1043+
DTYPES_AND_CAST_SPEC(f16, Half)
8691044

870-
casts = malloc((num_casts + 1) * sizeof(PyArrayMethod_Spec *));
1045+
PyArrayMethod_Spec **casts =
1046+
malloc((num_casts + 1) * sizeof(PyArrayMethod_Spec *));
8711047

8721048
int cast_i = 0;
8731049

@@ -916,6 +1092,12 @@ get_casts(PyArray_DTypeMeta *this, PyArray_DTypeMeta *other)
9161092
casts[cast_i++] = StringToULongLongCastSpec;
9171093
casts[cast_i++] = ULongLongToStringCastSpec;
9181094
#endif
1095+
casts[cast_i++] = StringToDoubleCastSpec;
1096+
casts[cast_i++] = DoubleToStringCastSpec;
1097+
casts[cast_i++] = StringToFloatCastSpec;
1098+
casts[cast_i++] = FloatToStringCastSpec;
1099+
casts[cast_i++] = StringToHalfCastSpec;
1100+
casts[cast_i++] = HalfToStringCastSpec;
9191101
if (is_pandas) {
9201102
casts[cast_i++] = ThisToOtherCastSpec;
9211103
casts[cast_i++] = OtherToThisCastSpec;

stringdtype/stringdtype/src/casts.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#define NO_IMPORT_ARRAY
99
#include "numpy/arrayobject.h"
1010
#include "numpy/experimental_dtype_api.h"
11+
#include "numpy/halffloat.h"
1112
#include "numpy/ndarraytypes.h"
1213

1314
PyArrayMethod_Spec **

stringdtype/tests/test_stringdtype.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,25 @@ def test_unsized_integer_casts(dtype, typename, signed):
408408
np.testing.assert_array_equal(ainp, ainp.astype(dtype).astype(idtype))
409409

410410

411+
@pytest.mark.parametrize("typename", ["float64", "float32", "float16"])
412+
def test_float_casts(dtype, typename):
413+
inp = [1.1, 2.8, -3.2, 2.7e4]
414+
ainp = np.array(inp, dtype=typename)
415+
np.testing.assert_array_equal(ainp, ainp.astype(dtype).astype(typename))
416+
417+
fi = np.finfo(typename)
418+
419+
inp = [1e-324, fi.smallest_subnormal, -1e-324, -fi.smallest_subnormal]
420+
eres = [0, fi.smallest_subnormal, -0, -fi.smallest_subnormal]
421+
res = np.array(inp, dtype=typename).astype(dtype).astype(typename)
422+
np.testing.assert_array_equal(eres, res)
423+
424+
inp = [2e308, fi.max, -2e308, fi.min]
425+
eres = [np.inf, fi.max, -np.inf, fi.min]
426+
res = np.array(inp, dtype=typename).astype(dtype).astype(typename)
427+
np.testing.assert_array_equal(eres, res)
428+
429+
411430
def test_take(dtype, string_list):
412431
sarr = np.array(string_list, dtype=dtype)
413432
out = np.empty(len(string_list), dtype=dtype)

0 commit comments

Comments
 (0)