Update on "add module level benchmark for gemma3 model"

Gasoonjia · Gasoonjia · commit c1167f83a27a · 2025-10-20T21:43:03.000-07:00
This diff adds a module-level benchmark for the GEMMA3 model. Also introduce mutlmodal_benchmark.cpp to replace original voxtral_runner.cpp for benchmarking both gemma3 and voxtral model in module level. Differential Revision: [D84958564](https://our.internmc.facebook.com/intern/diff/D84958564/) [ghstack-poisoned]
diff --git a/backends/cuda/runtime/shims/memory.cpp b/backends/cuda/runtime/shims/memory.cpp
@@ -234,10 +234,9 @@ AOTITorchError aoti_torch_empty_strided(
     }
     // For each dimension, add stride[i] * (size[i] - 1)
     // This gives us the maximum offset in that dimension
-    int64_t stride_i = (strides_ptr != nullptr) ? strides_ptr[i] : 0;
+    int64_t stride_i = (strides_ptr != nullptr) ? strides_ptr[i] : 1;
     if (strides_ptr == nullptr) {
       // Calculate contiguous stride if not provided
-      stride_i = 1;
       for (int64_t j = i + 1; j < ndim; j++) {
         stride_i *= sizes_ptr[j];
       }

Original file line number	Diff line number	Diff line change
`@@ -234,10 +234,9 @@ AOTITorchError aoti_torch_empty_strided(`
`234`	`234`	`}`
`235`	`235`	`// For each dimension, add stride[i] * (size[i] - 1)`
`236`	`236`	`// This gives us the maximum offset in that dimension`
`237`		`- int64_t stride_i = (strides_ptr != nullptr) ? strides_ptr[i] : 0;`
	`237`	`+ int64_t stride_i = (strides_ptr != nullptr) ? strides_ptr[i] : 1;`
`238`	`238`	`if (strides_ptr == nullptr) {`
`239`	`239`	`// Calculate contiguous stride if not provided`
`240`		`- stride_i = 1;`
`241`	`240`	`for (int64_t j = i + 1; j < ndim; j++) {`
`242`	`241`	`stride_i *= sizes_ptr[j];`
`243`	`242`	`}`