@@ -64,9 +64,9 @@ class ET_EXPERIMENTAL CudaBackend final
6464 // ============================================================================
6565 //
6666 // This backend supports storing GPU tensors between execute() calls to enable
67- // device-to-device (D2D) copies instead of slower host-to-device (H2D) copies.
68- // This is useful for encoder-decoder models where the encoder output is reused
69- // across many decoder iterations.
67+ // device-to-device (D2D) copies instead of slower host-to-device (H2D)
68+ // copies. This is useful for encoder-decoder models where the encoder output
69+ // is reused across many decoder iterations.
7070 //
7171 // SUPPORTED OPTIONS (via set_option):
7272 //
@@ -75,7 +75,8 @@ class ET_EXPERIMENTAL CudaBackend final
7575 // Only supports single-output methods.
7676 // Example: opts.set_option("store_output", "encoder_output");
7777 //
78- // "use_stored_input" (string): For inputs matching the stored tensor's size,
78+ // "use_stored_input" (string): For inputs matching the stored tensor's
79+ // size,
7980 // use D2D copy from the stored tensor instead of H2D copy from CPU.
8081 // This setting persists across execute() calls until reset.
8182 // Example: opts.set_option("use_stored_input", "encoder_output");
@@ -401,7 +402,7 @@ class ET_EXPERIMENTAL CudaBackend final
401402
402403 // Process input tensors: ExecuTorch provides CPU tensors, create GPU
403404 // copies. For stored inputs, use GPU-to-GPU copy instead of CPU-to-GPU.
404- for (int i = 0 ; i < n_inputs; i++) {
405+ for (size_t i = 0 ; i < n_inputs; i++) {
405406 // Get tensor dimensions and properties from ExecuTorch CPU tensor
406407 auto cpu_tensor = &(args[i]->toTensor ());
407408 auto sizes = cpu_tensor->sizes ();
@@ -478,7 +479,7 @@ class ET_EXPERIMENTAL CudaBackend final
478479 }
479480 // Process output tensors: create GPU counterparts for ExecuTorch CPU
480481 // tensors
481- for (int i = 0 ; i < n_outputs; i++) {
482+ for (size_t i = 0 ; i < n_outputs; i++) {
482483 // Get output tensor dimensions from ExecuTorch CPU tensor
483484 auto cpu_output_tensor = &(args[i + n_inputs]->toTensor ());
484485 auto sizes = cpu_output_tensor->sizes ();
@@ -557,7 +558,7 @@ class ET_EXPERIMENTAL CudaBackend final
557558 }
558559
559560 // Copy GPU output results back to CPU output tensors
560- for (int i = 0 ; i < n_outputs; i++) {
561+ for (size_t i = 0 ; i < n_outputs; i++) {
561562 auto cpu_output_tensor = &(args[i + n_inputs]->toTensor ());
562563 // For DYNAMIC_BOUND tensors we try to resize
563564 ET_CHECK_OK_OR_RETURN_ERROR (
0 commit comments