|
23 | 23 | #include "numpy/npy_math.h" |
24 | 24 | #include "convert_datatype.h" |
25 | 25 | #include "dtypemeta.h" |
| 26 | +#include "dispatching.h" |
26 | 27 |
|
27 | 28 |
|
28 | 29 | typedef struct { |
@@ -456,6 +457,225 @@ init_casts(void) |
456 | 457 | } |
457 | 458 |
|
458 | 459 |
|
| 460 | +/* |
| 461 | + * We also wish to test very simple ufunc functionality. So create two |
| 462 | + * ufunc loops: |
| 463 | + * 1. Multiplication, which can multiply the factors and work with that. |
| 464 | + * 2. Addition, which needs to use the common instance, and runs into |
| 465 | + * cast safety subtleties since we will implement it without an additional |
| 466 | + * cast. |
| 467 | + * |
| 468 | + * NOTE: When first writing this, promotion did not exist for new-style loops, |
| 469 | + * if it exists, we could use promotion to implement double * sfloat. |
| 470 | + */ |
| 471 | +static int |
| 472 | +multiply_sfloats(PyArrayMethod_Context *NPY_UNUSED(context), |
| 473 | + char *const data[], npy_intp const dimensions[], |
| 474 | + npy_intp const strides[], NpyAuxData *NPY_UNUSED(auxdata)) |
| 475 | +{ |
| 476 | + npy_intp N = dimensions[0]; |
| 477 | + char *in1 = data[0]; |
| 478 | + char *in2 = data[1]; |
| 479 | + char *out = data[2]; |
| 480 | + for (npy_intp i = 0; i < N; i++) { |
| 481 | + *(double *)out = *(double *)in1 * *(double *)in2; |
| 482 | + in1 += strides[0]; |
| 483 | + in2 += strides[1]; |
| 484 | + out += strides[2]; |
| 485 | + } |
| 486 | + return 0; |
| 487 | +} |
| 488 | + |
| 489 | + |
| 490 | +static NPY_CASTING |
| 491 | +multiply_sfloats_resolve_descriptors( |
| 492 | + PyArrayMethodObject *NPY_UNUSED(self), |
| 493 | + PyArray_DTypeMeta *NPY_UNUSED(dtypes[3]), |
| 494 | + PyArray_Descr *given_descrs[3], |
| 495 | + PyArray_Descr *loop_descrs[3]) |
| 496 | +{ |
| 497 | + /* |
| 498 | + * Multiply the scaling for the result. If the result was passed in we |
| 499 | + * simply ignore it and let the casting machinery fix it up here. |
| 500 | + */ |
| 501 | + double factor = ((PyArray_SFloatDescr *)given_descrs[1])->scaling; |
| 502 | + loop_descrs[2] = sfloat_scaled_copy( |
| 503 | + (PyArray_SFloatDescr *)given_descrs[0], factor); |
| 504 | + if (loop_descrs[2] == 0) { |
| 505 | + return -1; |
| 506 | + } |
| 507 | + Py_INCREF(given_descrs[0]); |
| 508 | + loop_descrs[0] = given_descrs[0]; |
| 509 | + Py_INCREF(given_descrs[1]); |
| 510 | + loop_descrs[1] = given_descrs[1]; |
| 511 | + return NPY_NO_CASTING; |
| 512 | +} |
| 513 | + |
| 514 | + |
| 515 | +/* |
| 516 | + * Unlike the multiplication implementation above, this loops deals with |
| 517 | + * scaling (casting) internally. This allows to test some different paths. |
| 518 | + */ |
| 519 | +static int |
| 520 | +add_sfloats(PyArrayMethod_Context *context, |
| 521 | + char *const data[], npy_intp const dimensions[], |
| 522 | + npy_intp const strides[], NpyAuxData *NPY_UNUSED(auxdata)) |
| 523 | +{ |
| 524 | + double fin1 = ((PyArray_SFloatDescr *)context->descriptors[0])->scaling; |
| 525 | + double fin2 = ((PyArray_SFloatDescr *)context->descriptors[1])->scaling; |
| 526 | + double fout = ((PyArray_SFloatDescr *)context->descriptors[2])->scaling; |
| 527 | + |
| 528 | + double fact1 = fin1 / fout; |
| 529 | + double fact2 = fin2 / fout; |
| 530 | + if (check_factor(fact1) < 0) { |
| 531 | + return -1; |
| 532 | + } |
| 533 | + if (check_factor(fact2) < 0) { |
| 534 | + return -1; |
| 535 | + } |
| 536 | + |
| 537 | + npy_intp N = dimensions[0]; |
| 538 | + char *in1 = data[0]; |
| 539 | + char *in2 = data[1]; |
| 540 | + char *out = data[2]; |
| 541 | + for (npy_intp i = 0; i < N; i++) { |
| 542 | + *(double *)out = (*(double *)in1 * fact1) + (*(double *)in2 * fact2); |
| 543 | + in1 += strides[0]; |
| 544 | + in2 += strides[1]; |
| 545 | + out += strides[2]; |
| 546 | + } |
| 547 | + return 0; |
| 548 | +} |
| 549 | + |
| 550 | + |
| 551 | +static NPY_CASTING |
| 552 | +add_sfloats_resolve_descriptors( |
| 553 | + PyArrayMethodObject *NPY_UNUSED(self), |
| 554 | + PyArray_DTypeMeta *NPY_UNUSED(dtypes[3]), |
| 555 | + PyArray_Descr *given_descrs[3], |
| 556 | + PyArray_Descr *loop_descrs[3]) |
| 557 | +{ |
| 558 | + /* |
| 559 | + * Here we accept an output descriptor (the inner loop can deal with it), |
| 560 | + * if none is given, we use the "common instance": |
| 561 | + */ |
| 562 | + if (given_descrs[2] == NULL) { |
| 563 | + loop_descrs[2] = sfloat_common_instance( |
| 564 | + given_descrs[0], given_descrs[1]); |
| 565 | + if (loop_descrs[2] == 0) { |
| 566 | + return -1; |
| 567 | + } |
| 568 | + } |
| 569 | + else { |
| 570 | + Py_INCREF(given_descrs[2]); |
| 571 | + loop_descrs[2] = given_descrs[2]; |
| 572 | + } |
| 573 | + Py_INCREF(given_descrs[0]); |
| 574 | + loop_descrs[0] = given_descrs[0]; |
| 575 | + Py_INCREF(given_descrs[1]); |
| 576 | + loop_descrs[1] = given_descrs[1]; |
| 577 | + |
| 578 | + /* If the factors mismatch, we do implicit casting inside the ufunc! */ |
| 579 | + double fin1 = ((PyArray_SFloatDescr *)loop_descrs[0])->scaling; |
| 580 | + double fin2 = ((PyArray_SFloatDescr *)loop_descrs[1])->scaling; |
| 581 | + double fout = ((PyArray_SFloatDescr *)loop_descrs[2])->scaling; |
| 582 | + |
| 583 | + if (fin1 == fout && fin2 == fout) { |
| 584 | + return NPY_NO_CASTING; |
| 585 | + } |
| 586 | + if (npy_fabs(fin1) == npy_fabs(fout) && npy_fabs(fin2) == npy_fabs(fout)) { |
| 587 | + return NPY_EQUIV_CASTING; |
| 588 | + } |
| 589 | + return NPY_SAME_KIND_CASTING; |
| 590 | +} |
| 591 | + |
| 592 | + |
| 593 | +static int |
| 594 | +add_loop(const char *ufunc_name, PyBoundArrayMethodObject *bmeth) |
| 595 | +{ |
| 596 | + PyObject *mod = PyImport_ImportModule("numpy"); |
| 597 | + if (mod == NULL) { |
| 598 | + return -1; |
| 599 | + } |
| 600 | + PyObject *ufunc = PyObject_GetAttrString(mod, ufunc_name); |
| 601 | + Py_DECREF(mod); |
| 602 | + if (!PyObject_TypeCheck(ufunc, &PyUFunc_Type)) { |
| 603 | + Py_DECREF(ufunc); |
| 604 | + PyErr_Format(PyExc_TypeError, |
| 605 | + "numpy.%s was not a ufunc!", ufunc_name); |
| 606 | + return -1; |
| 607 | + } |
| 608 | + PyObject *dtype_tup = PyArray_TupleFromItems( |
| 609 | + 3, (PyObject **)bmeth->dtypes, 0); |
| 610 | + if (dtype_tup == NULL) { |
| 611 | + Py_DECREF(ufunc); |
| 612 | + return -1; |
| 613 | + } |
| 614 | + PyObject *info = PyTuple_Pack(2, dtype_tup, bmeth->method); |
| 615 | + Py_DECREF(dtype_tup); |
| 616 | + if (info == NULL) { |
| 617 | + Py_DECREF(ufunc); |
| 618 | + return -1; |
| 619 | + } |
| 620 | + int res = PyUFunc_AddLoop((PyUFuncObject *)ufunc, info, 0); |
| 621 | + Py_DECREF(ufunc); |
| 622 | + Py_DECREF(info); |
| 623 | + return res; |
| 624 | +} |
| 625 | + |
| 626 | + |
| 627 | +/* |
| 628 | + * Add new ufunc loops (this is somewhat clumsy as of writing it, but should |
| 629 | + * get less so with the introduction of public API). |
| 630 | + */ |
| 631 | +static int |
| 632 | +init_ufuncs(void) { |
| 633 | + PyArray_DTypeMeta *dtypes[3] = { |
| 634 | + &PyArray_SFloatDType, &PyArray_SFloatDType, &PyArray_SFloatDType}; |
| 635 | + PyType_Slot slots[3] = {{0, NULL}}; |
| 636 | + PyArrayMethod_Spec spec = { |
| 637 | + .nin = 2, |
| 638 | + .nout =1, |
| 639 | + .dtypes = dtypes, |
| 640 | + .slots = slots, |
| 641 | + }; |
| 642 | + spec.name = "sfloat_multiply"; |
| 643 | + spec.casting = NPY_NO_CASTING; |
| 644 | + |
| 645 | + slots[0].slot = NPY_METH_resolve_descriptors; |
| 646 | + slots[0].pfunc = &multiply_sfloats_resolve_descriptors; |
| 647 | + slots[1].slot = NPY_METH_strided_loop; |
| 648 | + slots[1].pfunc = &multiply_sfloats; |
| 649 | + PyBoundArrayMethodObject *bmeth = PyArrayMethod_FromSpec_int(&spec, 0); |
| 650 | + if (bmeth == NULL) { |
| 651 | + return -1; |
| 652 | + } |
| 653 | + int res = add_loop("multiply", bmeth); |
| 654 | + Py_DECREF(bmeth); |
| 655 | + if (res < 0) { |
| 656 | + return -1; |
| 657 | + } |
| 658 | + |
| 659 | + spec.name = "sfloat_add"; |
| 660 | + spec.casting = NPY_SAME_KIND_CASTING; |
| 661 | + |
| 662 | + slots[0].slot = NPY_METH_resolve_descriptors; |
| 663 | + slots[0].pfunc = &add_sfloats_resolve_descriptors; |
| 664 | + slots[1].slot = NPY_METH_strided_loop; |
| 665 | + slots[1].pfunc = &add_sfloats; |
| 666 | + bmeth = PyArrayMethod_FromSpec_int(&spec, 0); |
| 667 | + if (bmeth == NULL) { |
| 668 | + return -1; |
| 669 | + } |
| 670 | + res = add_loop("add", bmeth); |
| 671 | + Py_DECREF(bmeth); |
| 672 | + if (res < 0) { |
| 673 | + return -1; |
| 674 | + } |
| 675 | + return 0; |
| 676 | +} |
| 677 | + |
| 678 | + |
459 | 679 | /* |
460 | 680 | * Python entry point, exported via `umathmodule.h` and `multiarraymodule.c`. |
461 | 681 | * TODO: Should be moved when the necessary API is not internal anymore. |
@@ -491,6 +711,10 @@ get_sfloat_dtype(PyObject *NPY_UNUSED(mod), PyObject *NPY_UNUSED(args)) |
491 | 711 | return NULL; |
492 | 712 | } |
493 | 713 |
|
| 714 | + if (init_ufuncs() < 0) { |
| 715 | + return NULL; |
| 716 | + } |
| 717 | + |
494 | 718 | initalized = NPY_TRUE; |
495 | 719 | return (PyObject *)&PyArray_SFloatDType; |
496 | 720 | } |
0 commit comments