Skip to content

Commit 09918a3

Browse files
committed
mid-way test
1 parent e201b90 commit 09918a3

File tree

4 files changed

+195
-105
lines changed

4 files changed

+195
-105
lines changed

quaddtype/numpy_quaddtype/src/quadblas_interface.cpp

Lines changed: 90 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -786,95 +786,95 @@ py_quadblas_get_version(PyObject *self, PyObject *args)
786786
return PyUnicode_FromString(QuadBLAS::VERSION);
787787
}
788788

789-
void matmul_op(Sleef_quad * inp1, Sleef_quad *inp2, Sleef_quad *out)
790-
{
791-
Sleef_quad *data_a, *data_b;
792-
QuadBackendType backend_a, backend_b;
793-
QuadBLAS::Layout layout_a, layout_b;
794-
795-
if (!extract_quad_array_info(a, &data_a, &backend_a, &layout_a) ||
796-
!extract_quad_array_info(b, &data_b, &backend_b, &layout_b)) {
797-
return nullptr;
798-
}
799-
800-
Sleef_quad *temp_a = nullptr, *temp_b = nullptr;
801-
Sleef_quad *sleef_a = ensure_sleef_backend(a, backend_a, &temp_a);
802-
Sleef_quad *sleef_b = ensure_sleef_backend(b, backend_b, &temp_b);
803-
804-
if (!sleef_a || !sleef_b) {
805-
QuadBLAS::aligned_free(temp_a);
806-
QuadBLAS::aligned_free(temp_b);
807-
return nullptr;
808-
}
809-
810-
QuadBackendType result_backend = BACKEND_SLEEF;
811-
if (backend_a == BACKEND_LONGDOUBLE && backend_b == BACKEND_LONGDOUBLE) {
812-
result_backend = BACKEND_LONGDOUBLE;
813-
}
814-
815-
npy_intp result_dims[2] = {m, n};
816-
QuadPrecDTypeObject *result_dtype = new_quaddtype_instance(result_backend);
817-
if (!result_dtype) {
818-
QuadBLAS::aligned_free(temp_a);
819-
QuadBLAS::aligned_free(temp_b);
820-
return nullptr;
821-
}
822-
823-
PyArrayObject *result =
824-
(PyArrayObject *)PyArray_Empty(2, result_dims, (PyArray_Descr *)result_dtype, 0);
825-
if (!result) {
826-
QuadBLAS::aligned_free(temp_a);
827-
QuadBLAS::aligned_free(temp_b);
828-
Py_DECREF(result_dtype);
829-
return nullptr;
830-
}
831-
832-
Sleef_quad *result_data = (Sleef_quad *)PyArray_DATA(result);
833-
for (npy_intp i = 0; i < m * n; i++) {
834-
result_data[i] = Sleef_cast_from_doubleq1(0.0);
835-
}
836-
837-
npy_intp lda, ldb, ldc;
838-
839-
if (layout_a == QuadBLAS::Layout::RowMajor) {
840-
lda = k;
841-
}
842-
else {
843-
lda = m;
844-
}
845-
846-
if (layout_b == QuadBLAS::Layout::RowMajor) {
847-
ldb = n;
848-
}
849-
else {
850-
ldb = k;
851-
}
852-
853-
QuadBLAS::Layout result_layout = layout_a;
854-
if (result_layout == QuadBLAS::Layout::RowMajor) {
855-
ldc = n;
856-
}
857-
else {
858-
ldc = m;
859-
}
860-
861-
Sleef_quad alpha = Sleef_cast_from_doubleq1(1.0);
862-
Sleef_quad beta = Sleef_cast_from_doubleq1(0.0);
863-
864-
QuadBLAS::gemm(result_layout, m, n, k, alpha, sleef_a, lda, sleef_b, ldb, beta, result_data,
865-
ldc);
866-
867-
if (result_backend == BACKEND_LONGDOUBLE) {
868-
long double *ld_result = (long double *)PyArray_DATA(result);
869-
for (npy_intp i = 0; i < m * n; i++) {
870-
ld_result[i] = (long double)Sleef_cast_to_doubleq1(result_data[i]);
871-
}
872-
}
873-
874-
QuadBLAS::aligned_free(temp_a);
875-
QuadBLAS::aligned_free(temp_b);
876-
877-
return (PyObject *)result;
878-
}
789+
// void matmul_op(Sleef_quad * inp1, Sleef_quad *inp2, Sleef_quad *out)
790+
// {
791+
// Sleef_quad *data_a, *data_b;
792+
// QuadBackendType backend_a, backend_b;
793+
// QuadBLAS::Layout layout_a, layout_b;
794+
795+
// if (!extract_quad_array_info(a, &data_a, &backend_a, &layout_a) ||
796+
// !extract_quad_array_info(b, &data_b, &backend_b, &layout_b)) {
797+
// return nullptr;
798+
// }
799+
800+
// Sleef_quad *temp_a = nullptr, *temp_b = nullptr;
801+
// Sleef_quad *sleef_a = ensure_sleef_backend(a, backend_a, &temp_a);
802+
// Sleef_quad *sleef_b = ensure_sleef_backend(b, backend_b, &temp_b);
803+
804+
// if (!sleef_a || !sleef_b) {
805+
// QuadBLAS::aligned_free(temp_a);
806+
// QuadBLAS::aligned_free(temp_b);
807+
// return nullptr;
808+
// }
809+
810+
// QuadBackendType result_backend = BACKEND_SLEEF;
811+
// if (backend_a == BACKEND_LONGDOUBLE && backend_b == BACKEND_LONGDOUBLE) {
812+
// result_backend = BACKEND_LONGDOUBLE;
813+
// }
814+
815+
// npy_intp result_dims[2] = {m, n};
816+
// QuadPrecDTypeObject *result_dtype = new_quaddtype_instance(result_backend);
817+
// if (!result_dtype) {
818+
// QuadBLAS::aligned_free(temp_a);
819+
// QuadBLAS::aligned_free(temp_b);
820+
// return nullptr;
821+
// }
822+
823+
// PyArrayObject *result =
824+
// (PyArrayObject *)PyArray_Empty(2, result_dims, (PyArray_Descr *)result_dtype, 0);
825+
// if (!result) {
826+
// QuadBLAS::aligned_free(temp_a);
827+
// QuadBLAS::aligned_free(temp_b);
828+
// Py_DECREF(result_dtype);
829+
// return nullptr;
830+
// }
831+
832+
// Sleef_quad *result_data = (Sleef_quad *)PyArray_DATA(result);
833+
// for (npy_intp i = 0; i < m * n; i++) {
834+
// result_data[i] = Sleef_cast_from_doubleq1(0.0);
835+
// }
836+
837+
// npy_intp lda, ldb, ldc;
838+
839+
// if (layout_a == QuadBLAS::Layout::RowMajor) {
840+
// lda = k;
841+
// }
842+
// else {
843+
// lda = m;
844+
// }
845+
846+
// if (layout_b == QuadBLAS::Layout::RowMajor) {
847+
// ldb = n;
848+
// }
849+
// else {
850+
// ldb = k;
851+
// }
852+
853+
// QuadBLAS::Layout result_layout = layout_a;
854+
// if (result_layout == QuadBLAS::Layout::RowMajor) {
855+
// ldc = n;
856+
// }
857+
// else {
858+
// ldc = m;
859+
// }
860+
861+
// Sleef_quad alpha = Sleef_cast_from_doubleq1(1.0);
862+
// Sleef_quad beta = Sleef_cast_from_doubleq1(0.0);
863+
864+
// QuadBLAS::gemm(result_layout, m, n, k, alpha, sleef_a, lda, sleef_b, ldb, beta, result_data,
865+
// ldc);
866+
867+
// if (result_backend == BACKEND_LONGDOUBLE) {
868+
// long double *ld_result = (long double *)PyArray_DATA(result);
869+
// for (npy_intp i = 0; i < m * n; i++) {
870+
// ld_result[i] = (long double)Sleef_cast_to_doubleq1(result_data[i]);
871+
// }
872+
// }
873+
874+
// QuadBLAS::aligned_free(temp_a);
875+
// QuadBLAS::aligned_free(temp_b);
876+
877+
// return (PyObject *)result;
878+
// }
879879

880880
#endif // DISABLE_QUADBLAS

quaddtype/numpy_quaddtype/src/umath/matmul.cpp

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
#define NO_IMPORT_ARRAY
66
#define NO_IMPORT_UFUNC
77

8-
98
#include <Python.h>
109
#include <cstdio>
1110

@@ -25,20 +24,19 @@
2524

2625
static NPY_CASTING
2726
quad_matmul_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[],
28-
PyArray_Descr *const given_descrs[],
29-
PyArray_Descr *loop_descrs[], npy_intp *NPY_UNUSED(view_offset))
27+
PyArray_Descr *const given_descrs[], PyArray_Descr *loop_descrs[],
28+
npy_intp *NPY_UNUSED(view_offset))
3029
{
31-
32-
NPY_CASTING casting = NPY_NO_CASTING;
33-
std::cout << "exiting the descriptor";
34-
return casting;
30+
NPY_CASTING casting = NPY_NO_CASTING;
31+
std::cout << "exiting the descriptor";
32+
return casting;
3533
}
3634

3735
template <binary_op_quad_def sleef_op, binary_op_longdouble_def longdouble_op>
3836
int
3937
quad_generic_matmul_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[],
40-
npy_intp const dimensions[], npy_intp const strides[],
41-
NpyAuxData *auxdata)
38+
npy_intp const dimensions[], npy_intp const strides[],
39+
NpyAuxData *auxdata)
4240
{
4341
npy_intp N = dimensions[0];
4442
char *in1_ptr = data[0], *in2_ptr = data[1];
@@ -73,8 +71,8 @@ quad_generic_matmul_strided_loop_unaligned(PyArrayMethod_Context *context, char
7371
template <binary_op_quad_def sleef_op, binary_op_longdouble_def longdouble_op>
7472
int
7573
quad_generic_matmul_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[],
76-
npy_intp const dimensions[], npy_intp const strides[],
77-
NpyAuxData *auxdata)
74+
npy_intp const dimensions[], npy_intp const strides[],
75+
NpyAuxData *auxdata)
7876
{
7977
npy_intp N = dimensions[0];
8078
char *in1_ptr = data[0], *in2_ptr = data[1];
@@ -101,6 +99,7 @@ quad_generic_matmul_strided_loop_aligned(PyArrayMethod_Context *context, char *c
10199
return 0;
102100
}
103101

102+
template <binary_op_quad_def sleef_op, binary_op_longdouble_def longdouble_op>
104103
int
105104
create_matmul_ufunc(PyObject *numpy, const char *ufunc_name)
106105
{
@@ -136,13 +135,11 @@ create_matmul_ufunc(PyObject *numpy, const char *ufunc_name)
136135
return 0;
137136
}
138137

139-
140138
int
141139
init_matmul_ops(PyObject *numpy)
142140
{
143-
if (create_matmul_ufunc<quad_add>(numpy, "matmul") < 0) {
141+
if (create_matmul_ufunc<quad_add, ld_add>(numpy, "matmul") < 0) {
144142
return -1;
145143
}
146144
return 0;
147145
}
148-

quaddtype/release_tracker.md

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
# Plan for `numpy-quaddtype` v1.5
2+
3+
| ufunc name | Added |
4+
| ------------- | ----- |
5+
| add ||
6+
| subtract ||
7+
| multiply ||
8+
| matmul | #116 |
9+
| divide ||
10+
| logaddexp | |
11+
| logaddexp2 | |
12+
| true_divide | |
13+
| floor_divide | |
14+
| negative ||
15+
| positive ||
16+
| power ||
17+
| float_power | |
18+
| remainder | |
19+
| mod ||
20+
| fmod | |
21+
| divmod | |
22+
| absolute ||
23+
| fabs | |
24+
| rint ||
25+
| sign | |
26+
| heaviside | |
27+
| conj | |
28+
| conjugate | |
29+
| exp ||
30+
| exp2 ||
31+
| log ||
32+
| log2 ||
33+
| log10 ||
34+
| expm1 | |
35+
| log1p ||
36+
| sqrt ||
37+
| square ||
38+
| cbrt | |
39+
| reciprocal | |
40+
| gcd | |
41+
| lcm | |
42+
| sin ||
43+
| cos ||
44+
| tan ||
45+
| arcsin ||
46+
| arccos ||
47+
| arctan ||
48+
| arctan2 ||
49+
| hypot | |
50+
| sinh | |
51+
| cosh | |
52+
| tanh | |
53+
| arcsinh | |
54+
| arccosh | |
55+
| arctanh | |
56+
| degrees | |
57+
| radians | |
58+
| deg2rad | |
59+
| rad2deg | |
60+
| bitwise_and | |
61+
| bitwise_or | |
62+
| bitwise_xor | |
63+
| invert | |
64+
| left_shift | |
65+
| right_shift | |
66+
| greater ||
67+
| greater_equal ||
68+
| less ||
69+
| less_equal ||
70+
| not_equal ||
71+
| equal ||
72+
| logical_and | |
73+
| logical_or | |
74+
| logical_xor | |
75+
| logical_not | |
76+
| maximum ||
77+
| minimum ||
78+
| fmax | |
79+
| fmin | |
80+
| isfinite | |
81+
| isinf | |
82+
| isnan | |
83+
| isnat | |
84+
| signbit | |
85+
| copysign | |
86+
| nextafter | |
87+
| spacing | |
88+
| modf | |
89+
| ldexp | |
90+
| frexp | |
91+
| floor ||
92+
| ceil ||
93+
| trunc ||

0 commit comments

Comments
 (0)