@@ -21,14 +21,14 @@ def make_declare(loop_orders, dtypes, sub, compute_stride_jump=True):
21
21
# the number of elements in that dimension,
22
22
# the stride in that dimension,
23
23
# and the jump from an iteration to the next
24
- decl += f"npy_intp { var } _n{ value } ;\n ssize_t { var } _stride{ value } ;\n "
24
+ decl += f"npy_intp { var } _n{ value } ;\n npy_intp { var } _stride{ value } ;\n "
25
25
if compute_stride_jump :
26
- decl += f"int { var } _jump{ value } _{ j } ;\n "
26
+ decl += f"npy_intp { var } _jump{ value } _{ j } ;\n "
27
27
28
28
elif compute_stride_jump :
29
29
# if the dimension is broadcasted, we only need
30
30
# the jump (arbitrary length and stride = 0)
31
- decl += f"int { var } _jump{ value } _{ j } ;\n "
31
+ decl += f"npy_intp { var } _jump{ value } _{ j } ;\n "
32
32
33
33
return decl
34
34
@@ -257,7 +257,7 @@ def loop_over(preloop, code, indices, i):
257
257
forloop = f"""#pragma omp parallel for if( { suitable_n } >={ openmp_elemwise_minsize } )\n """
258
258
else :
259
259
forloop = ""
260
- forloop += f"""for (int { iterv } = 0; { iterv } <{ suitable_n } ; { iterv } ++)"""
260
+ forloop += f"""for (npy_intp { iterv } = 0; { iterv } <{ suitable_n } ; { iterv } ++)"""
261
261
return f"""
262
262
{ preloop }
263
263
{ forloop } {{
@@ -317,8 +317,8 @@ def make_reordered_loop(
317
317
# The first element of each pair is the absolute value of the stride
318
318
# The second element correspond to the index in the initial loop order
319
319
order_loops = f"""
320
- std::vector< std::pair<int, int > > { ovar } _loops({ nnested } );
321
- std::vector< std::pair<int, int > >::iterator { ovar } _loops_it = { ovar } _loops.begin();
320
+ std::vector< std::pair<int, npy_intp > > { ovar } _loops({ nnested } );
321
+ std::vector< std::pair<int, npy_intp > >::iterator { ovar } _loops_it = { ovar } _loops.begin();
322
322
"""
323
323
324
324
# Fill the loop vector with the appropriate <stride, index> pairs
@@ -347,7 +347,7 @@ def make_reordered_loop(
347
347
"""
348
348
349
349
# Get the (sorted) total number of iterations of each loop
350
- declare_totals = f"int init_totals[{ nnested } ];\n "
350
+ declare_totals = f"npy_intp init_totals[{ nnested } ];\n "
351
351
declare_totals += compute_output_dims_lengths ("init_totals" , init_loop_orders , sub )
352
352
353
353
# Sort totals to match the new order that was computed by sorting
@@ -358,7 +358,7 @@ def make_reordered_loop(
358
358
359
359
for i in range (nnested ):
360
360
declare_totals += f"""
361
- int TOTAL_{ i } = init_totals[{ ovar } _loops_it->second];
361
+ npy_intp TOTAL_{ i } = init_totals[{ ovar } _loops_it->second];
362
362
++{ ovar } _loops_it;
363
363
"""
364
364
@@ -389,14 +389,14 @@ def get_loop_strides(loop_order, i):
389
389
)
390
390
391
391
declare_strides = f"""
392
- int init_strides[{ nvars } ][{ nnested } ] = {{
392
+ npy_intp init_strides[{ nvars } ][{ nnested } ] = {{
393
393
{ strides }
394
394
}};"""
395
395
396
396
# Declare (sorted) stride and for each variable
397
397
# we iterate from innermost loop to outermost loop
398
398
declare_strides += f"""
399
- std::vector< std::pair<int, int > >::reverse_iterator { ovar } _loops_rit;
399
+ std::vector< std::pair<int, npy_intp > >::reverse_iterator { ovar } _loops_rit;
400
400
"""
401
401
402
402
for i in range (nvars ):
@@ -405,7 +405,7 @@ def get_loop_strides(loop_order, i):
405
405
{ ovar } _loops_rit = { ovar } _loops.rbegin();"""
406
406
for j in reversed (range (nnested )):
407
407
declare_strides += f"""
408
- int { var } _stride_l{ j } = init_strides[{ i } ][{ ovar } _loops_rit->second];
408
+ npy_intp { var } _stride_l{ j } = init_strides[{ i } ][{ ovar } _loops_rit->second];
409
409
++{ ovar } _loops_rit;
410
410
"""
411
411
@@ -436,7 +436,7 @@ def get_loop_strides(loop_order, i):
436
436
if openmp :
437
437
openmp_elemwise_minsize = config .openmp_elemwise_minsize
438
438
forloop += f"""#pragma omp parallel for if( { total } >={ openmp_elemwise_minsize } )\n """
439
- forloop += f"for(int { iterv } = 0; { iterv } <{ total } ; { iterv } ++)"
439
+ forloop += f"for(npy_intp { iterv } = 0; { iterv } <{ total } ; { iterv } ++)"
440
440
441
441
loop = f"""
442
442
{ forloop }
@@ -596,14 +596,14 @@ def make_reordered_loop_careduce(
596
596
if (PyArray_SIZE(inp) == 0) {
597
597
acc_iter = (npy_float64*)(PyArray_DATA(acc));
598
598
int_n = PyArray_SIZE(acc);
599
- for(int i = 0; i < n; i++)
599
+ for(npy_intp i = 0; i < n; i++)
600
600
{
601
601
npy_float64 &acc_i = acc_iter[i];
602
602
acc_i = 0;
603
603
}
604
604
} else {
605
- std::vector< std::pair<int, int > > loops(2);
606
- std::vector< std::pair<int, int > >::iterator loops_it = loops.begin();
605
+ std::vector< std::pair<int, npy_intp > > loops(2);
606
+ std::vector< std::pair<int, npy_intp > >::iterator loops_it = loops.begin();
607
607
608
608
loops_it->first = abs(PyArray_STRIDES(inp)[0]);
609
609
loops_it->second = 0;
@@ -613,28 +613,28 @@ def make_reordered_loop_careduce(
613
613
++loops_it;
614
614
std::sort(loops.rbegin(), loops.rend());
615
615
616
- int dim_lengths[2] = {inp_n0, inp_n1};
617
- int inp_strides[2] = {inp_stride0, inp_stride1};
618
- int acc_strides[2] = {acc_stride0, 0};
616
+ npy_intp dim_lengths[2] = {inp_n0, inp_n1};
617
+ npy_intp inp_strides[2] = {inp_stride0, inp_stride1};
618
+ npy_intp acc_strides[2] = {acc_stride0, 0};
619
619
bool reduction_axes[2] = {0, 1};
620
620
621
621
loops_it = loops.begin();
622
- int dim_length_0 = dim_lengths[loops_it->second];
623
- int is_reduction_axis_0 = reduction_axes[loops_it->second];
624
- int inp_stride_0 = inp_strides[loops_it->second];
625
- int acc_stride_0 = acc_strides[loops_it->second];
622
+ npy_intp dim_length_0 = dim_lengths[loops_it->second];
623
+ bool is_reduction_axis_0 = reduction_axes[loops_it->second];
624
+ npy_intp inp_stride_0 = inp_strides[loops_it->second];
625
+ npy_intp acc_stride_0 = acc_strides[loops_it->second];
626
626
++loops_it;
627
- int dim_length_1 = dim_lengths[loops_it->second];
628
- int is_reduction_axis_1 = reduction_axes[loops_it->second];
629
- int inp_stride_1 = inp_strides[loops_it->second];
630
- int acc_stride_1 = acc_strides[loops_it->second];
627
+ npy_intp dim_length_1 = dim_lengths[loops_it->second];
628
+ bool is_reduction_axis_1 = reduction_axes[loops_it->second];
629
+ npy_intp inp_stride_1 = inp_strides[loops_it->second];
630
+ npy_intp acc_stride_1 = acc_strides[loops_it->second];
631
631
++loops_it;
632
632
633
633
inp_iter = (npy_float64*)(PyArray_DATA(inp));
634
634
acc_iter = (npy_float64*)(PyArray_DATA(acc));
635
635
636
- for(int iter_0 = 0; iter_0<dim_length_0; iter_0++){
637
- for(int iter_1 = 0; iter_1<dim_length_1; iter_1++){
636
+ for(npy_intp iter_0 = 0; iter_0<dim_length_0; iter_0++){
637
+ for(npy_intp iter_1 = 0; iter_1<dim_length_1; iter_1++){
638
638
npy_float64 &inp_i = *(inp_iter + inp_stride_1*iter_1 + inp_stride_0*iter_0);
639
639
npy_float64 &acc_i = *(acc_iter + acc_stride_1*iter_1 + acc_stride_0*iter_0);
640
640
@@ -654,8 +654,8 @@ def make_reordered_loop_careduce(
654
654
// Special case for empty inputs
655
655
if (PyArray_SIZE({ inp_var } ) == 0) {{
656
656
{ acc_var } _iter = ({ acc_dtype } *)(PyArray_DATA({ acc_var } ));
657
- int n = PyArray_SIZE({ acc_var } );
658
- for(int i = 0; i < n; i++)
657
+ npy_intp n = PyArray_SIZE({ acc_var } );
658
+ for(npy_intp i = 0; i < n; i++)
659
659
{{
660
660
{ acc_dtype } &{ acc_var } _i = { acc_var } _iter[i];
661
661
{ initial_value }
@@ -669,8 +669,8 @@ def make_reordered_loop_careduce(
669
669
# The second element correspond to the index in the initial loop order
670
670
order_loops = dedent (
671
671
f"""
672
- std::vector< std::pair<int, int > > loops({ inp_ndim } );
673
- std::vector< std::pair<int, int > >::iterator loops_it = loops.begin();
672
+ std::vector< std::pair<int, npy_intp > > loops({ inp_ndim } );
673
+ std::vector< std::pair<int, npy_intp > >::iterator loops_it = loops.begin();
674
674
"""
675
675
)
676
676
@@ -691,9 +691,9 @@ def make_reordered_loop_careduce(
691
691
counter = iter (range (inp_ndim ))
692
692
unsorted_vars = dedent (
693
693
f"""
694
- int dim_lengths[{ inp_ndim } ] = {{{ ',' .join (f'{ inp_var } _n{ i } ' for i in range (inp_ndim ))} }};
695
- int inp_strides[{ inp_ndim } ] = {{{ ',' .join (f'{ inp_var } _stride{ i } ' for i in range (inp_ndim ))} }};
696
- int acc_strides[{ inp_ndim } ] = {{{ ',' .join ("0" if i in reduction_axes else f'{ acc_var } _stride{ next (counter )} ' for i in range (inp_ndim ))} }};
694
+ npy_intp dim_lengths[{ inp_ndim } ] = {{{ ',' .join (f'{ inp_var } _n{ i } ' for i in range (inp_ndim ))} }};
695
+ npy_intp inp_strides[{ inp_ndim } ] = {{{ ',' .join (f'{ inp_var } _stride{ i } ' for i in range (inp_ndim ))} }};
696
+ npy_intp acc_strides[{ inp_ndim } ] = {{{ ',' .join ("0" if i in reduction_axes else f'{ acc_var } _stride{ next (counter )} ' for i in range (inp_ndim ))} }};
697
697
bool reduction_axes[{ inp_ndim } ] = {{{ ', ' .join ("1" if i in reduction_axes else "0" for i in range (inp_ndim ))} }};\n
698
698
"""
699
699
)
@@ -702,10 +702,10 @@ def make_reordered_loop_careduce(
702
702
for i in range (inp_ndim ):
703
703
sorted_vars += dedent (
704
704
f"""
705
- int dim_length_{ i } = dim_lengths[loops_it->second];
706
- int is_reduction_axis_{ i } = reduction_axes[loops_it->second];
707
- int { inp_var } _stride_{ i } = inp_strides[loops_it->second];
708
- int { acc_var } _stride_{ i } = acc_strides[loops_it->second];
705
+ npy_intp dim_length_{ i } = dim_lengths[loops_it->second];
706
+ bool is_reduction_axis_{ i } = reduction_axes[loops_it->second];
707
+ npy_intp { inp_var } _stride_{ i } = inp_strides[loops_it->second];
708
+ npy_intp { acc_var } _stride_{ i } = acc_strides[loops_it->second];
709
709
++loops_it;
710
710
"""
711
711
)
@@ -748,7 +748,7 @@ def make_reordered_loop_careduce(
748
748
dim_length = f"dim_length_{ i } "
749
749
loop = dedent (
750
750
f"""
751
- for(int { iter_var } = 0; { iter_var } <{ dim_length } ; { iter_var } ++){{
751
+ for(npy_intp { iter_var } = 0; { iter_var } <{ dim_length } ; { iter_var } ++){{
752
752
{ loop }
753
753
}}
754
754
"""
0 commit comments