software-mansion
diff --git a/‎packages/react-native-executorch/android/libs/classes.jar‎
8.58 KB b/‎packages/react-native-executorch/android/libs/classes.jar‎
8.58 KB
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp‎
Lines changed: 6 additions & 15 deletions b/‎packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp‎
Lines changed: 6 additions & 15 deletions
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.h‎
Lines changed: 0 additions & 6 deletions b/‎packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.h‎
Lines changed: 0 additions & 6 deletions
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/models/voice_activity_detection/Utils.cpp‎
Lines changed: 1 addition & 1 deletion b/‎packages/react-native-executorch/common/rnexecutorch/models/voice_activity_detection/Utils.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎packages/react-native-executorch/common/runner/arange_util.cpp‎
Lines changed: 44 additions & 0 deletions b/‎packages/react-native-executorch/common/runner/arange_util.cpp‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎packages/react-native-executorch/common/runner/arange_util.h‎
Lines changed: 37 additions & 0 deletions b/‎packages/react-native-executorch/common/runner/arange_util.h‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎packages/react-native-executorch/common/runner/constants.h‎
Lines changed: 28 additions & 0 deletions b/‎packages/react-native-executorch/common/runner/constants.h‎
Lines changed: 28 additions & 0 deletions
@@ -5,6 +5,7 @@
 #include <rnexecutorch/threads/GlobalThreadPool.h>
 
 namespace rnexecutorch::models::llm {
+namespace llm = ::executorch::extension::llm;
 namespace fs = std::filesystem;
 using namespace facebook;
 using executorch::extension::TensorPtr;
@@ -14,8 +15,8 @@ using executorch::runtime::Error;
 LLM::LLM(const std::string &modelSource, const std::string &tokenizerSource,
          std::shared_ptr<react::CallInvoker> callInvoker)
     : BaseModel(modelSource, callInvoker, Module::LoadMode::File),
-      runner(std::make_unique<example::Runner>(module_.get(), tokenizerSource,
-                                               false)) {
+      runner(
+          std::make_unique<example::Runner>(module_.get(), tokenizerSource)) {
   auto loadResult = runner->load();
   if (loadResult != Error::Ok) {
     throw std::runtime_error("Failed to load LLM runner, error code: " +
@@ -24,20 +25,9 @@ LLM::LLM(const std::string &modelSource, const std::string &tokenizerSource,
 
   memorySizeLowerBound = fs::file_size(fs::path(modelSource)) +
                          fs::file_size(fs::path(tokenizerSource));
-
-  // Determine the input mode
-  auto inputShapes = getAllInputShapes("forward");
-  auto &tokensTensorShape = inputShapes[0];
-  auto &positionsTensorShape = inputShapes[1];
-  if (tokensTensorShape.size() != 2 || positionsTensorShape.size() != 1) {
-    throw std::runtime_error("Unsupported LLM input format");
-  }
-  if (positionsTensorShape[0] != 1 &&
-      tokensTensorShape[1] == positionsTensorShape[0]) {
-    runner->set_extended_input_mode(true);
-  }
 }
 
+// TODO: add a way to manipulate the generation config with params
 void LLM::generate(std::string input, std::shared_ptr<jsi::Function> callback) {
   if (!runner || !runner->is_loaded()) {
     throw std::runtime_error("Runner is not loaded");
@@ -50,7 +40,8 @@ void LLM::generate(std::string input, std::shared_ptr<jsi::Function> callback) {
     });
   };
 
-  auto error = runner->generate(input, nativeCallback, {}, false);
+  auto config = llm::GenerationConfig{.echo = false, .warming = false};
+  auto error = runner->generate(input, config, nativeCallback, {});
   if (error != executorch::runtime::Error::Ok) {
     throw std::runtime_error("Failed to generate text, error code: " +
                              std::to_string(static_cast<int>(error)));
 
@@ -30,12 +30,6 @@ class LLM : public BaseModel {
 
 private:
   std::unique_ptr<example::Runner> runner;
-
-  // A typical input for parallel processing in exported LLM model consists of 2
-  // tensors of shapes [1, N] and [1], where N is the number of tokens. Hovewer,
-  // some exported models require inputs of shapes [1, N] and [N], which needs
-  // to be marked before using LLM runner.
-  bool extended_input_mode_ = false;
 };
 } // namespace models::llm
 
 
@@ -5,7 +5,7 @@ size_t getNonSpeechClassProbabilites(const executorch::aten::Tensor &tensor,
                                      size_t numClass, size_t size,
                                      std::vector<float> &resultVector,
                                      size_t startIdx) {
-  const auto* rawData = tensor.const_data_ptr<float>();
+  const auto *rawData = tensor.const_data_ptr<float>();
   for (size_t i = 0; i < size; i++) {
     resultVector[startIdx + i] = rawData[numClass * i];
   }
 
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "arange_util.h"
+
+namespace torch::executor::native {
+#define ET_ARANGE_IMPL(ctx, start, numel, step, out, op_name)                  \
+  ET_SWITCH_REALHBF16_TYPES(out.scalar_type(), ctx, op_name, CTYPE, [&]() {    \
+    auto out_data = out.mutable_data_ptr<CTYPE>();                             \
+    for (executorch::aten::SizesType i = 0; i < numel; ++i) {                  \
+      out_data[i] = static_cast<CTYPE>(start + i * step);                      \
+    }                                                                          \
+  })
+
+executorch::aten::SizesType compute_arange_out_size(double start, double end,
+                                                    double step) {
+  executorch::aten::SizesType numel =
+      static_cast<executorch::aten::SizesType>(std::ceil((end - start) / step));
+
+  ET_CHECK_MSG(numel >= 0,
+               "numel should be non-negative, but got (%" PRId64
+               "). start (%f), end (%f), step (%f)",
+               static_cast<int64_t>(numel), start, end, step);
+  return numel;
+}
+
+void arange_out_impl(KernelRuntimeContext &ctx, double start, double end,
+                     double step, Tensor &out) {
+  (void)ctx;
+  executorch::aten::SizesType numel = compute_arange_out_size(start, end, step);
+  ET_ARANGE_IMPL(ctx, start, numel, step, out, "arange.start_out");
+}
+
+void arange_out_impl(KernelRuntimeContext &ctx, double end, Tensor &out) {
+  (void)ctx;
+  ET_ARANGE_IMPL(ctx, 0.0, end, 1.0, out, "arange.out");
+}
+
+} // namespace torch::executor::native
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include "kernel_includes.h"
+
+namespace torch::executor::native {
+
+executorch::aten::SizesType compute_arange_out_size(double start, double end,
+                                                    double step);
+
+inline executorch::aten::SizesType compute_arange_out_size(double end) {
+  return compute_arange_out_size(0.0, end, 1.0);
+}
+
+void arange_out_impl(KernelRuntimeContext &ctx, double start, double end,
+                     double step, Tensor &out);
+
+void arange_out_impl(KernelRuntimeContext &ctx, double end, Tensor &out);
+
+inline void arange_out_impl(double start, double end, double step,
+                            Tensor &out) {
+  KernelRuntimeContext ctx;
+  arange_out_impl(ctx, start, end, step, out);
+}
+
+inline void arange_out_impl(double end, Tensor &out) {
+  KernelRuntimeContext ctx;
+  arange_out_impl(ctx, 0.0, end, 1.0, out);
+}
+} // namespace torch::executor::native
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#pragma once
+// constants for LLM runtime
+namespace executorch::extension::llm {
+
+// Runtime metadata key constants
+inline constexpr auto kEnableDynamicShape = "enable_dynamic_shape";
+inline constexpr auto kBosId = "get_bos_id";
+inline constexpr auto kEosIds = "get_eos_ids";
+inline constexpr auto kMaxSeqLen = "get_max_seq_len";
+inline constexpr auto kMaxContextLen = "get_max_context_len";
+inline constexpr auto kVocabSize = "get_vocab_size";
+inline constexpr auto kUseKVCache = "use_kv_cache";
+inline constexpr auto kUseSDPAWithKVCache = "use_sdpa_with_kv_cache";
+
+// Multimodal method name conventions
+inline constexpr auto kVisionEncoderMethod = "vision_encoder";
+inline constexpr auto kAudioEncoderMethod = "audio_encoder";
+inline constexpr auto kTokenEmbeddingMethod = "token_embedding";
+inline constexpr auto kTextModelMethod = "text_decoder";
+
+} // namespace executorch::extension::llm
Original file line number	Diff line number	Diff line change
`@@ -5,7 +5,7 @@ size_t getNonSpeechClassProbabilites(const executorch::aten::Tensor &tensor,`
`5`	`5`	`size_t numClass, size_t size,`
`6`	`6`	`std::vector<float> &resultVector,`
`7`	`7`	`size_t startIdx) {`
`8`		`- const auto* rawData = tensor.const_data_ptr<float>();`
	`8`	`+ const auto *rawData = tensor.const_data_ptr<float>();`
`9`	`9`	`for (size_t i = 0; i < size; i++) {`
`10`	`10`	`resultVector[startIdx + i] = rawData[numClass * i];`
`11`	`11`	`}`