Update base for Update on "gemma3 e2e runner on cuda"

Gasoonjia · Gasoonjia · commit 7310d249398a · 2025-10-20T21:43:04.000-07:00
This diff introduces e2e runner for gemma3 model on cuda delegating using AOTI library, which is guarded by CI. Also other necessary infrastructure updates for building and running the `gemma3 e2e runner` on CUDA devices. Differential Revision: [D85087532](https://our.internmc.facebook.com/intern/diff/D85087532/) [ghstack-poisoned]
diff --git a/backends/cuda/runtime/shims/memory.cpp b/backends/cuda/runtime/shims/memory.cpp
@@ -234,10 +234,9 @@ AOTITorchError aoti_torch_empty_strided(
     }
     // For each dimension, add stride[i] * (size[i] - 1)
     // This gives us the maximum offset in that dimension
-    int64_t stride_i = (strides_ptr != nullptr) ? strides_ptr[i] : 0;
+    int64_t stride_i = (strides_ptr != nullptr) ? strides_ptr[i] : 1;
     if (strides_ptr == nullptr) {
       // Calculate contiguous stride if not provided
-      stride_i = 1;
       for (int64_t j = i + 1; j < ndim; j++) {
         stride_i *= sizes_ptr[j];
       }

Original file line number	Diff line number	Diff line change
`@@ -234,10 +234,9 @@ AOTITorchError aoti_torch_empty_strided(`
`234`	`234`	`}`
`235`	`235`	`// For each dimension, add stride[i] * (size[i] - 1)`
`236`	`236`	`// This gives us the maximum offset in that dimension`
`237`		`- int64_t stride_i = (strides_ptr != nullptr) ? strides_ptr[i] : 0;`
	`237`	`+ int64_t stride_i = (strides_ptr != nullptr) ? strides_ptr[i] : 1;`
`238`	`238`	`if (strides_ptr == nullptr) {`
`239`	`239`	`// Calculate contiguous stride if not provided`
`240`		`- stride_i = 1;`
`241`	`240`	`for (int64_t j = i + 1; j < ndim; j++) {`
`242`	`241`	`stride_i *= sizes_ptr[j];`
`243`	`242`	`}`