Merge branch 'main' into multimodal-eval-2

anirudhs001 · web-flow · commit 842be237086b · 2025-03-18T12:16:06.000+05:30
diff --git a/README.md b/README.md
@@ -582,11 +582,13 @@ We provide
 
 ## Community Contributions
 
-We really value our community and the contributions made by our wonderful users. We'll use this section to call out some of these contributions! If you'd like to help out as well, please see the [CONTRIBUTING](CONTRIBUTING.md) guide.
+We really value our community and the contributions made by our wonderful users! 
 
-To connect with us and other community members, we invite you to join our Slack community by filling out this [form](https://docs.google.com/forms/d/e/1FAIpQLSeADnUNW36fjKjYzyHDOzEB_abKQE9b6gqqW9NXse6O0MWh0A/viewform). Once you've joined, you can:
+If you'd like to help out, connect with us and other community members by joining our [Discord](https://discord.gg/hm2Keduk3v). Once you've joined, you can:
 * Head to the `#torchchat-general` channel for general questions, discussion, and community support.
-* Join the `#torchchat-contributors` channel if you're interested in contributing directly to project development.
+* Hop in the `#torchchat-contributors` channel if you're interested in contributing directly to project development.
+
+Also give our [CONTRIBUTING](CONTRIBUTING.md) guide a read.
 
 Looking forward to discussing with you about torchchat future!
 
diff --git a/runner/run.cpp b/runner/run.cpp
@@ -53,6 +53,9 @@ using executorch::extension::TensorPtr;
 using torch::executor::EValue;
 using torch::executor::Module;
 using torch::executor::Result;
+using executorch::runtime::MemoryManager;
+using executorch::runtime::MemoryAllocator;
+using executorch::runtime::Error;
 #endif
 
 using tokenizers::SPTokenizer;
@@ -867,7 +870,26 @@ int main(int argc, char *argv[]) {
                     : torch::Device(torch::kCUDA);
   ModelType model_type = get_model_type(std::stoi(aoti_metadata["tokenizer_type"]));
 #else // __ET_MODEL__
-  ModelType model_type = get_model_type(llama_ver);
+  Error load_status = transformer.runner->load();
+  ET_CHECK_MSG(
+      load_status == torch::executor::Error::Ok,
+      "program::load() failed with status 0x%" PRIx32,
+      static_cast<uint32_t>(load_status));
+
+  static std::array<uint8_t, 4 * 1024U * 1024U> method_allocator_pool; // 4MB
+  MemoryAllocator method_allocator{MemoryAllocator(
+      sizeof(method_allocator_pool), method_allocator_pool.data())};
+  MemoryManager memory_manager(&method_allocator, nullptr);
+  auto tokenizer_method = transformer.runner->program()->load_method("tokenizer_type", &memory_manager);
+
+  Error execute_status = tokenizer_method->execute();
+  ET_CHECK_MSG(
+      execute_status == torch::executor::Error::Ok,
+      "method::execute() failed with status 0x%" PRIx32,
+      static_cast<uint32_t>(execute_status));
+
+  auto tokenizer_type = tokenizer_method->get_output(0).toInt();
+  ModelType model_type = get_model_type(tokenizer_type);
 #endif
 
   if (model_type == UNKNOWN_MODEL) {
diff --git a/torchchat/export.py b/torchchat/export.py
@@ -313,7 +313,7 @@ def export_to_edge(
             core_aten_ep, edge_constant_methods, edge_compile_config, verbose=verbose
         )
 
-    def export_for_et(model, device, output_path) -> str:
+    def export_for_et(model, device, output_path, edge_constant_methods) -> str:
 
         input = (
             torch.tensor([[1]], dtype=torch.long, device=device),
@@ -344,12 +344,15 @@ def export_for_et(model, device, output_path) -> str:
         with torch.nn.attention.sdpa_kernel(
             [torch.nn.attention.SDPBackend.MATH]
         ), torch.no_grad():
-            m = export_for_training(model, input, dynamic_shapes=dynamic_shapes).module()
+            m = export_for_training(
+                model, input, dynamic_shapes=dynamic_shapes
+            ).module()
 
             edge_manager = export_to_edge(
                 m,
                 input,
                 dynamic_shapes=dynamic_shapes,
+                edge_constant_methods=edge_constant_methods,
                 edge_compile_config=edge_config,
             )
         edge_manager = edge_manager.to_backend(XnnpackDynamicallyQuantizedPartitioner())
@@ -365,6 +368,7 @@ def export_for_et(model, device, output_path) -> str:
         )
 
         print("The methods are: ", export_program.methods)
+        print("The config methods are: ", export_program.config_methods)
         with open(output_path, "wb") as f:
             export_program.write_to_file(f)
 
@@ -407,7 +411,9 @@ def main(args):
             f"Warning! ExecuTorch export target is controlled by export recipe, not device setting. Ignoring device={builder_args.device} setting."
         )
         builder_args.device = "cpu"
-    elif (output_pte_path or output_dso_path or output_aoti_package_path) and "mps" in builder_args.device:
+    elif (
+        output_pte_path or output_dso_path or output_aoti_package_path
+    ) and "mps" in builder_args.device:
         print("Warning! Device MPS not supported for export. Exporting for device CPU.")
         builder_args.device = "cpu"
 
@@ -473,13 +479,26 @@ def main(args):
                 support_tensor_subclass=False,
             )
             _unset_gguf_kwargs(builder_args)
- 
+
+    if tokenizer_args is None:
+        tokenizer_type = "0"
+    elif tokenizer_args.is_sentencepiece:
+        tokenizer_type = "2"  # Corresponding to llama2
+    else:
+        tokenizer_type = "3"  # Corresponding to llama3
+
     with torch.no_grad():
         if output_pte_path:
             output_pte_path = str(os.path.abspath(output_pte_path))
             if executorch_export_available:
                 print(f"Exporting model using ExecuTorch to {output_pte_path}")
-                export_for_et(model_to_pte, builder_args.device, args.output_pte_path)
+                print(f"Tokenizer type is {tokenizer_type}")
+                export_for_et(
+                    model_to_pte,
+                    builder_args.device,
+                    args.output_pte_path,
+                    {"tokenizer_type": int(tokenizer_type)},
+                )
             else:
                 print(
                     "Export with executorch requested but ExecuTorch could not be loaded"
@@ -503,13 +522,6 @@ def main(args):
         if output_aoti_package_path:
             output_aoti_package_path = str(os.path.abspath(output_aoti_package_path))
 
-            if tokenizer_args is None:
-                tokenizer_type = "0"
-            elif tokenizer_args.is_sentencepiece:
-                tokenizer_type = "2"  # Corresponding to llama2
-            else:
-                tokenizer_type = "3"  # Corresponding to llama3
-
             metadata = {"tokenizer_type": tokenizer_type}
             print(
                 "Exporting model using AOT Inductor to " f"{output_aoti_package_path}."
diff --git a/torchchat/utils/scripts/build_native.sh b/torchchat/utils/scripts/build_native.sh
@@ -100,4 +100,4 @@ else
 fi
 cmake --build ./cmake-out --target "${TARGET}"_run
 
-printf "Build finished. Please run: \n./cmake-out/${TARGET}_run model.<pte|so> -z tokenizer.model -l <llama version (2 or 3)> -i <prompt>\n"
+printf "Build finished. Please run: \n./cmake-out/${TARGET}_run model.<pte|so> -z tokenizer.model > -i <prompt>\n"