|
67 | 67 |
|
68 | 68 | #define MKL_INT_MAX ((npy_intp) ((~((MKL_UINT) 0)) >> 1)) |
69 | 69 |
|
70 | | -#define CHUNKED_VML_CALL2(vml_func, n, type, in1, op1) \ |
71 | | - do { \ |
72 | | - npy_intp _n_ = (n); \ |
73 | | - const npy_intp _chunk_size = MKL_INT_MAX; \ |
74 | | - type *in1p = (type *) (in1); \ |
75 | | - type *op1p = (type *) (op1); \ |
76 | | - while (_n_ > _chunk_size) { \ |
77 | | - npy_intp _current_chunk = (_n_ > _chunk_size) ? _chunk_size : _n_; \ |
78 | | - vml_func((MKL_INT) _current_chunk, in1p, op1p); \ |
79 | | - _n_ -= _current_chunk; \ |
80 | | - in1p += _current_chunk; \ |
81 | | - op1p += _current_chunk; \ |
82 | | - } \ |
83 | | - if (_n_) { \ |
84 | | - vml_func((MKL_INT) _n_, in1p, op1p); \ |
85 | | - } \ |
| 70 | +#define CHUNKED_VML_CALL2(vml_func, n, type, in1, op1) \ |
| 71 | + do { \ |
| 72 | + npy_intp _n_ = (n); \ |
| 73 | + const npy_intp _chunk_size = MKL_INT_MAX; \ |
| 74 | + type *in1p = (type *) (in1); \ |
| 75 | + type *op1p = (type *) (op1); \ |
| 76 | + while (_n_ > 0) { \ |
| 77 | + npy_intp _current_chunk = (_n_ > _chunk_size) ? _chunk_size : _n_; \ |
| 78 | + vml_func((MKL_INT) _current_chunk, in1p, op1p); \ |
| 79 | + _n_ -= _current_chunk; \ |
| 80 | + in1p += _current_chunk; \ |
| 81 | + op1p += _current_chunk; \ |
| 82 | + } \ |
86 | 83 | } while (0) |
87 | 84 |
|
88 | | -#define CHUNKED_VML_CALL3(vml_func, n, type, in1, in2, op1) \ |
89 | | - do { \ |
90 | | - npy_intp _n_ = (n); \ |
91 | | - const npy_intp _chunk_size = MKL_INT_MAX; \ |
92 | | - type *in1p = (type *) (in1); \ |
93 | | - type *in2p = (type *) (in2); \ |
94 | | - type *op1p = (type *) (op1); \ |
95 | | - while (_n_ > _chunk_size) { \ |
96 | | - npy_intp _current_chunk = (_n_ > _chunk_size) ? _chunk_size : _n_; \ |
97 | | - vml_func((MKL_INT) _current_chunk, in1p, in2p, op1p); \ |
98 | | - _n_ -= _current_chunk; \ |
99 | | - in1p += _current_chunk; \ |
100 | | - in2p += _current_chunk; \ |
101 | | - op1p += _current_chunk; \ |
102 | | - } \ |
103 | | - if (_n_) { \ |
104 | | - vml_func((MKL_INT)_n_, in1p, in2p, op1p); \ |
105 | | - } \ |
| 85 | +#define CHUNKED_VML_CALL3(vml_func, n, type, in1, in2, op1) \ |
| 86 | + do { \ |
| 87 | + npy_intp _n_ = (n); \ |
| 88 | + const npy_intp _chunk_size = MKL_INT_MAX; \ |
| 89 | + type *in1p = (type *) (in1); \ |
| 90 | + type *in2p = (type *) (in2); \ |
| 91 | + type *op1p = (type *) (op1); \ |
| 92 | + while (_n_ > 0) { \ |
| 93 | + npy_intp _current_chunk = (_n_ > _chunk_size) ? _chunk_size : _n_; \ |
| 94 | + vml_func((MKL_INT) _current_chunk, in1p, in2p, op1p); \ |
| 95 | + _n_ -= _current_chunk; \ |
| 96 | + in1p += _current_chunk; \ |
| 97 | + in2p += _current_chunk; \ |
| 98 | + op1p += _current_chunk; \ |
| 99 | + } \ |
106 | 100 | } while(0) |
107 | 101 |
|
108 | 102 |
|
|
116 | 110 | const type _shiftA = (shiftA); \ |
117 | 111 | const type _scaleB = (scaleB); \ |
118 | 112 | const type _shiftB = (shiftB); \ |
119 | | - while (_n_ > _chunk_size) { \ |
120 | | - npy_intp _current_chunk = (_n_ > _chunk_size) ? _chunk_size : _n_; \ |
121 | | - vml_func(_current_chunk, in1p, in1p, _scaleA, _shiftA, _scaleB, _shiftB, op1p); \ |
122 | | - _n_ -= _current_chunk; \ |
123 | | - in1p += _current_chunk; \ |
124 | | - op1p += _current_chunk; \ |
125 | | - } \ |
126 | | - if (_n_) { \ |
127 | | - vml_func((MKL_INT)_n_, in1p, in1p, _scaleA, _shiftA, _scaleB, _shiftB, op1p); \ |
| 113 | + while (_n_ > 0) { \ |
| 114 | + npy_intp _current_chunk = (_n_ > _chunk_size) ? _chunk_size : _n_; \ |
| 115 | + vml_func(_current_chunk, in1p, in1p, _scaleA, _shiftA, _scaleB, _shiftB, op1p); \ |
| 116 | + _n_ -= _current_chunk; \ |
| 117 | + in1p += _current_chunk; \ |
| 118 | + op1p += _current_chunk; \ |
128 | 119 | } \ |
129 | 120 | } while(0) |
130 | 121 |
|
|
0 commit comments