pytorch
diff --git a/‎kernels/portable/cpu/op_any.cpp‎
Lines changed: 5 additions & 4 deletions b/‎kernels/portable/cpu/op_any.cpp‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎kernels/portable/cpu/op_cdist_forward.cpp‎
Lines changed: 6 additions & 6 deletions b/‎kernels/portable/cpu/op_cdist_forward.cpp‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎kernels/portable/cpu/op_constant_pad_nd.cpp‎
Lines changed: 5 additions & 4 deletions b/‎kernels/portable/cpu/op_constant_pad_nd.cpp‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎kernels/portable/cpu/op_convolution.cpp‎
Lines changed: 18 additions & 17 deletions b/‎kernels/portable/cpu/op_convolution.cpp‎
Lines changed: 18 additions & 17 deletions
diff --git a/‎kernels/portable/cpu/op_diagonal_copy.cpp‎
Lines changed: 6 additions & 4 deletions b/‎kernels/portable/cpu/op_diagonal_copy.cpp‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎kernels/portable/cpu/op_flip.cpp‎
Lines changed: 5 additions & 4 deletions b/‎kernels/portable/cpu/op_flip.cpp‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎kernels/portable/cpu/op_full.cpp‎
Lines changed: 2 additions & 1 deletion b/‎kernels/portable/cpu/op_full.cpp‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎kernels/portable/cpu/op_full_like.cpp‎
Lines changed: 2 additions & 1 deletion b/‎kernels/portable/cpu/op_full_like.cpp‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎kernels/portable/cpu/op_gather.cpp‎
Lines changed: 3 additions & 2 deletions b/‎kernels/portable/cpu/op_gather.cpp‎
Lines changed: 3 additions & 2 deletions
@@ -6,6 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 
+#include <c10/util/irange.h>
 #include <executorch/kernels/portable/cpu/util/reduce_util.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
 
@@ -34,7 +35,7 @@ Tensor& any_all_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
       const auto data_in = in.const_data_ptr<CTYPE_IN>();
       auto data_out = out.mutable_data_ptr<CTYPE_OUT>();
       data_out[0] = static_cast<CTYPE_OUT>(false);
-      for (auto i = 0; i < in.numel(); ++i) {
+      for (const auto i : c10::irange(in.numel())) {
         if (static_cast<bool>(data_in[i])) {
           data_out[0] = static_cast<CTYPE_OUT>(true);
           break;
@@ -83,12 +84,12 @@ Tensor& any_dims_out(
       CTYPE_OUT* out_data = out.mutable_data_ptr<CTYPE_OUT>();
       if (dim_list.has_value() && dim_list.value().empty()) {
         const CTYPE_IN* in_data = in.const_data_ptr<CTYPE_IN>();
-        for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) {
+        for (const auto out_ix : c10::irange(out.numel())) {
           out_data[out_ix] =
               static_cast<CTYPE_OUT>(static_cast<bool>(in_data[out_ix]));
         }
       } else {
-        for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) {
+        for (const auto out_ix : c10::irange(out.numel())) {
           bool any = false;
           if (in.numel() > 0) {
             any = map_reduce_over_dim_list<CTYPE_IN, bool>(
@@ -138,7 +139,7 @@ Tensor& any_out(
   ET_SWITCH_REALHBBF16_TYPES(in_type, ctx, name, CTYPE_IN, [&] {
     ET_SWITCH_TWO_TYPES(Bool, Byte, out_type, ctx, name, CTYPE_OUT, [&] {
       CTYPE_OUT* out_data = out.mutable_data_ptr<CTYPE_OUT>();
-      for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) {
+      for (const auto out_ix : c10::irange(out.numel())) {
         CTYPE_OUT any = false;
         if (in.numel() > 0) {
           std::tuple<CTYPE_OUT, long> acc =
 
@@ -6,6 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 
+#include <c10/util/irange.h>
 #include <executorch/kernels/portable/cpu/util/broadcast_util.h>
 #include <executorch/kernels/portable/cpu/util/distance_util.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
@@ -34,7 +35,7 @@ void cdist(const Tensor& x1, const Tensor& x2, Tensor& out, double p) {
   // If the last dimension of x1 (which is equal to the last dimension of x2)
   // has size 0, then the output is filled with 0s.
   if (x1.numel() == 0) {
-    for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) {
+    for (const auto out_ix : c10::irange(out.numel())) {
       out_data[out_ix] = 0;
     }
     return;
@@ -64,7 +65,7 @@ void cdist(const Tensor& x1, const Tensor& x2, Tensor& out, double p) {
   size_t x2_inner_size = R * M;
   size_t out_inner_size = P * R;
 
-  for (size_t b = 0; b < out_batch_numel; ++b) {
+  for (const auto b : c10::irange(out_batch_numel)) {
     size_t x1_base_ix = b * x1_inner_size;
     size_t x2_base_ix = b * x2_inner_size;
     size_t out_base_ix = b * out_inner_size;
@@ -81,14 +82,13 @@ void cdist(const Tensor& x1, const Tensor& x2, Tensor& out, double p) {
         x2_base_ix = linearize_access_indexes(out_base_coord, out.dim(), x2);
       }
     }
-
     size_t out_ix = 0;
-    for (size_t i = 0; i < P; ++i) {
+    for (const auto i : c10::irange(P)) {
       const CTYPE* row_i = x1_data + x1_base_ix + i * M;
-      for (size_t j = 0; j < R; ++j) {
+      for (const auto j : c10::irange(R)) {
         const CTYPE* row_j = x2_data + x2_base_ix + j * M;
         CTYPE agg = 0;
-        for (size_t k = 0; k < M; ++k) {
+        for (const auto k : c10::irange(M)) {
           CTYPE diff = std::abs(row_i[k] - row_j[k]);
           agg = Norm::reduce(agg, Norm::map(diff, p));
         }
 
@@ -6,6 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 
+#include <c10/util/irange.h>
 #include <cmath>
 #include <cstring>
 
@@ -56,7 +57,7 @@ void apply_padding_to_dim(
   size_t out_step_len = out_strides[dim];
   size_t in_step_len = self_strides[dim];
 
-  for (size_t i = 0; i < pad_before; ++i) {
+  for ([[maybe_unused]] const auto i : c10::irange(pad_before)) {
     set_all_to_value(out_data, out_step_len, value);
     out_data += out_step_len;
   }
@@ -75,7 +76,7 @@ void apply_padding_to_dim(
   }
   // Otherwise, call this function recursively
   else {
-    for (size_t i = 0; i < self_sizes[dim]; ++i) {
+    for ([[maybe_unused]] const auto i : c10::irange(self_sizes[dim])) {
       apply_padding_to_dim(
           ndim,
           self_data,
@@ -94,7 +95,7 @@ void apply_padding_to_dim(
     }
   }
 
-  for (int i = 0; i < pad_after; ++i) {
+  for ([[maybe_unused]] const auto i : c10::irange(pad_after)) {
     set_all_to_value(out_data, out_step_len, value);
     out_data += out_step_len;
   }
@@ -124,7 +125,7 @@ void constant_pad_nd_out_impl(
   // Collect sizes and strides of input and output tensors and determine the
   // last padded dimension
   size_t last_padded_dim = 0;
-  for (size_t i = 0; i < ndim; ++i) {
+  for (const auto i : c10::irange(ndim)) {
     self_sizes[i] = self.size(i);
     self_strides[i] = getTrailingDims(self, static_cast<int64_t>(i));
     out_sizes[i] = out.size(i);
 
@@ -6,6 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 
+#include <c10/util/irange.h>
 #include <cstring>
 
 #include <executorch/kernels/portable/cpu/util/dtype_util.h>
@@ -91,25 +92,25 @@ void conv2d_impl(
   if (!transposed) {
     w_coord[0] = out_c;
     // Compute 2D output region
-    for (size_t out_y = 0; out_y < out_H; ++out_y) {
+    for (const auto out_y : c10::irange(out_H)) {
       out_coord[2] = out_y;
-      for (size_t out_x = 0; out_x < out_W; ++out_x) {
+      for (const auto out_x : c10::irange(out_W)) {
         out_coord[3] = out_x;
 
         CTYPE accum = 0.0f;
-        for (size_t in_c = in_c_start; in_c < in_c_start + in_C_per_group;
-             ++in_c) {
+        for (const auto in_c :
+             c10::irange(in_c_start, in_c_start + in_C_per_group)) {
           in_coord[1] = in_c;
           w_coord[1] = in_c - in_c_start;
 
-          for (size_t w_y = 0; w_y < w_H; ++w_y) {
+          for (const auto w_y : c10::irange(w_H)) {
             w_coord[2] = w_y;
 
             size_t in_y = stride_y * out_y + dilation_y * w_y - padding_y;
             in_coord[2] = in_y;
             // Only proceed if input y coordinate is within bounds
             if (in_y >= 0 && in_y < in_H) {
-              for (size_t w_x = 0; w_x < w_W; ++w_x) {
+              for (const auto w_x : c10::irange(w_W)) {
                 w_coord[3] = w_x;
 
                 size_t in_x = stride_x * out_x + dilation_x * w_x - padding_x;
@@ -143,29 +144,29 @@ void conv2d_impl(
   } else { // transposed convolution
     w_coord[1] = out_c - out_c_start;
 
-    for (size_t in_y = 0; in_y < in_H; ++in_y) {
+    for (const auto in_y : c10::irange(in_H)) {
       in_coord[2] = in_y;
 
-      for (size_t in_x = 0; in_x < in_W; ++in_x) {
+      for (const auto in_x : c10::irange(in_W)) {
         in_coord[3] = in_x;
 
-        for (size_t in_c = in_c_start; in_c < in_c_start + in_C_per_group;
-             ++in_c) {
+        for (const auto in_c :
+             c10::irange(in_c_start, in_c_start + in_C_per_group)) {
           in_coord[1] = in_c;
 
           size_t in_idx =
               calculate_linear_index(in_coord, in_strides.data(), 4);
           CTYPE in_val = in_ptr[in_idx];
 
           w_coord[0] = in_c;
-          for (size_t w_y = 0; w_y < w_H; ++w_y) {
+          for (const auto w_y : c10::irange(w_H)) {
             w_coord[2] = w_y;
             size_t out_y = stride_y * in_y + dilation_y * w_y - padding_y;
             out_coord[2] = out_y;
 
             // Only proceed if output y coordinate is within bounds
             if (out_y >= 0 && out_y < out_H) {
-              for (size_t w_x = 0; w_x < w_W; ++w_x) {
+              for (const auto w_x : c10::irange(w_W)) {
                 w_coord[3] = w_x;
                 size_t out_x = stride_x * in_x + dilation_x * w_x - padding_x;
                 out_coord[3] = out_x;
@@ -302,21 +303,21 @@ void convolution_wrapper(
       memset(out_ptr, 0, out.nbytes());
     } else {
       // If bias is present, we initialize the output to the bias value
-      for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) {
+      for (const auto out_ix : c10::irange(out.numel())) {
         out_ptr[out_ix] = load_bias(&bias_ptr
                                         [((out_ix / out_strides[1]) % out_C) *
                                          bias.value().element_size()]);
       }
     }
   }
 
-  for (size_t batch = 0; batch < out_N; ++batch) {
-    for (size_t group = 0; group < groups; ++group) {
+  for (const auto batch : c10::irange(out_N)) {
+    for (const auto group : c10::irange(groups)) {
       // Align channel offset based on the group
       size_t out_c_start = group * out_C_per_group;
       // Populate all the out channels in the group
-      for (size_t out_c = out_c_start; out_c < out_c_start + out_C_per_group;
-           ++out_c) {
+      for (const auto out_c :
+           c10::irange(out_c_start, out_c_start + out_C_per_group)) {
         conv2d_impl(
             in_ptr,
             in_sizes,
 
@@ -6,6 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 
+#include <c10/util/irange.h>
 #include <executorch/kernels/portable/cpu/util/copy_ops_util.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
 #include <executorch/runtime/platform/assert.h>
@@ -40,20 +41,21 @@ void diagonal_copy_impl(
 
   size_t new_ndim = out.dim();
   int64_t new_sizes[kTensorDimensionLimit];
-  for (size_t i = 0; i < new_ndim; ++i) {
+  for (const auto i : c10::irange(new_ndim)) {
     new_sizes[i] = out.size(i);
   }
 
   int64_t new_strides[kTensorDimensionLimit];
   size_t shift = 0;
-  for (size_t d = 0; d < in.dim(); ++d) {
-    if (d == dim1 || d == dim2) {
+  size_t in_dim = in.dim();
+  for (const auto d : c10::irange(in_dim)) {
+    if (static_cast<int64_t>(d) == dim1 || static_cast<int64_t>(d) == dim2) {
       shift++;
     } else {
       new_strides[d - shift] = in.strides().at(d);
     }
   }
-  new_strides[in.dim() - 2] = in.strides().at(dim1) + in.strides().at(dim2);
+  new_strides[in_dim - 2] = in.strides().at(dim1) + in.strides().at(dim2);
 
   as_strided_copy<CTYPE>(
       in, {new_sizes, new_ndim}, {new_strides, new_ndim}, storage_offset, out);
 
@@ -5,6 +5,7 @@
  * This source code is licensed under the BSD-style license found in the
  * LICENSE file in the root directory of this source tree.
  */
+#include <c10/util/irange.h>
 
 #include <executorch/kernels/portable/cpu/util/reduce_util.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
@@ -25,7 +26,7 @@ size_t unflip_flat_ix(size_t ix, const Tensor& in, ArrayRef<bool> flip_dim) {
   indexToCoordinate(in, ix, ix_coord);
 
   size_t unflip_coord[kTensorDimensionLimit];
-  for (size_t d = 0; d < in.dim(); d++) {
+  for (const auto d : c10::irange(in.dim())) {
     if (flip_dim[d]) {
       unflip_coord[d] = in.size(d) - ix_coord[d] - 1;
     } else {
@@ -54,10 +55,10 @@ Tensor& flip_out(
   ET_KERNEL_CHECK(ctx, check_flip_args(in, dims, out), InvalidArgument, out);
 
   bool flip_dim_data[kTensorDimensionLimit];
-  for (size_t i = 0; i < in.dim(); i++) {
+  for (const auto i : c10::irange(in.dim())) {
     flip_dim_data[i] = false;
   }
-  for (size_t i = 0; i < dims.size(); i++) {
+  for (const auto i : c10::irange(dims.size())) {
     const auto d = dims[i] < 0 ? dims[i] + nonzero_dim(in) : dims[i];
     flip_dim_data[d] = true;
   }
@@ -70,7 +71,7 @@ Tensor& flip_out(
     const CTYPE* in_data = in.const_data_ptr<CTYPE>();
     CTYPE* out_data = out.mutable_data_ptr<CTYPE>();
 
-    for (size_t ix = 0; ix < out.numel(); ++ix) {
+    for (const auto ix : c10::irange(in.numel())) {
       out_data[ix] = in_data[unflip_flat_ix(ix, in, flip_dim)];
     }
   });
 
@@ -5,6 +5,7 @@
  * This source code is licensed under the BSD-style license found in the
  * LICENSE file in the root directory of this source tree.
  */
+#include <c10/util/irange.h>
 
 #include <executorch/kernels/portable/cpu/scalar_utils.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
@@ -44,7 +45,7 @@ Tensor& full_out(
     ET_SWITCH_REALHBBF16_TYPES(out_type, ctx, name, CTYPE_OUT, [&] {
       CTYPE_OUT val_casted = static_cast<CTYPE_OUT>(val);
       auto data_out = out.mutable_data_ptr<CTYPE_OUT>();
-      for (size_t i = 0; i < out.numel(); ++i) {
+      for (const auto i : c10::irange(out.numel())) {
         data_out[i] = val_casted;
       }
     });
 
@@ -5,6 +5,7 @@
  * This source code is licensed under the BSD-style license found in the
  * LICENSE file in the root directory of this source tree.
  */
+#include <c10/util/irange.h>
 
 #include <executorch/kernels/portable/cpu/scalar_utils.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
@@ -60,7 +61,7 @@ Tensor& full_like_out(
     ET_SWITCH_REALHBBF16_TYPES(out_type, ctx, name, CTYPE_OUT, [&] {
       CTYPE_OUT val_casted = static_cast<CTYPE_OUT>(val);
       auto data_out = out.mutable_data_ptr<CTYPE_OUT>();
-      for (size_t i = 0; i < out.numel(); ++i) {
+      for (const auto i : c10::irange(out.numel())) {
         data_out[i] = val_casted;
       }
     });
 
@@ -6,6 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 
+#include <c10/util/irange.h>
 #include <cinttypes>
 #include <cstdint>
 #include <cstring>
@@ -37,12 +38,12 @@ void gather_helper(
     return;
   }
 
-  for (size_t ix = 0; ix < index.numel(); ++ix) {
+  for (const auto ix : c10::irange(index.numel())) {
     size_t ix_coord[kTensorDimensionLimit];
     indexToCoordinate(index, ix, ix_coord);
 
     size_t in_coord[kTensorDimensionLimit];
-    for (size_t i = 0; i < out.dim(); ++i) {
+    for (const auto i : c10::irange(out.dim())) {
       if (i == dim) {
         in_coord[i] = index_data[ix];
       } else {