Skip to content

Commit cd2f5d0

Browse files
authored
feat: bump ExecuTorch runtime to v1.0.0
## Description <!-- Provide a concise and descriptive summary of the changes implemented in this PR. --> ### Introduces a breaking change? - [ ] Yes - [x] No ### Type of change - [ ] Bug fix (change which fixes an issue) - [x] New feature (change which adds functionality) - [ ] Documentation update (improves or adds clarity to existing documentation) - [ ] Other (chores, tests, code style improvements etc.) ### Tested on - [x] iOS - [x] Android ### Testing instructions <!-- Provide step-by-step instructions on how to test your changes. Include setup details if necessary. --> ### Screenshots <!-- Add screenshots here, if applicable --> ### Related issues <!-- Link related issues here using #issue-number --> ### Checklist - [x] I have performed a self-review of my code - [x] I have commented my code, particularly in hard-to-understand areas - [ ] I have updated the documentation accordingly - [ ] My changes generate no new warnings ### Additional notes <!-- Include any additional information, assumptions, or context that reviewers might need to understand this PR. -->
1 parent 83130d3 commit cd2f5d0

File tree

124 files changed

+9424
-6053
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

124 files changed

+9424
-6053
lines changed
8.58 KB
Binary file not shown.

packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <rnexecutorch/threads/GlobalThreadPool.h>
66

77
namespace rnexecutorch::models::llm {
8+
namespace llm = ::executorch::extension::llm;
89
namespace fs = std::filesystem;
910
using namespace facebook;
1011
using executorch::extension::TensorPtr;
@@ -14,8 +15,8 @@ using executorch::runtime::Error;
1415
LLM::LLM(const std::string &modelSource, const std::string &tokenizerSource,
1516
std::shared_ptr<react::CallInvoker> callInvoker)
1617
: BaseModel(modelSource, callInvoker, Module::LoadMode::File),
17-
runner(std::make_unique<example::Runner>(module_.get(), tokenizerSource,
18-
false)) {
18+
runner(
19+
std::make_unique<example::Runner>(module_.get(), tokenizerSource)) {
1920
auto loadResult = runner->load();
2021
if (loadResult != Error::Ok) {
2122
throw std::runtime_error("Failed to load LLM runner, error code: " +
@@ -24,20 +25,9 @@ LLM::LLM(const std::string &modelSource, const std::string &tokenizerSource,
2425

2526
memorySizeLowerBound = fs::file_size(fs::path(modelSource)) +
2627
fs::file_size(fs::path(tokenizerSource));
27-
28-
// Determine the input mode
29-
auto inputShapes = getAllInputShapes("forward");
30-
auto &tokensTensorShape = inputShapes[0];
31-
auto &positionsTensorShape = inputShapes[1];
32-
if (tokensTensorShape.size() != 2 || positionsTensorShape.size() != 1) {
33-
throw std::runtime_error("Unsupported LLM input format");
34-
}
35-
if (positionsTensorShape[0] != 1 &&
36-
tokensTensorShape[1] == positionsTensorShape[0]) {
37-
runner->set_extended_input_mode(true);
38-
}
3928
}
4029

30+
// TODO: add a way to manipulate the generation config with params
4131
void LLM::generate(std::string input, std::shared_ptr<jsi::Function> callback) {
4232
if (!runner || !runner->is_loaded()) {
4333
throw std::runtime_error("Runner is not loaded");
@@ -50,7 +40,8 @@ void LLM::generate(std::string input, std::shared_ptr<jsi::Function> callback) {
5040
});
5141
};
5242

53-
auto error = runner->generate(input, nativeCallback, {}, false);
43+
auto config = llm::GenerationConfig{.echo = false, .warming = false};
44+
auto error = runner->generate(input, config, nativeCallback, {});
5445
if (error != executorch::runtime::Error::Ok) {
5546
throw std::runtime_error("Failed to generate text, error code: " +
5647
std::to_string(static_cast<int>(error)));

packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,6 @@ class LLM : public BaseModel {
3030

3131
private:
3232
std::unique_ptr<example::Runner> runner;
33-
34-
// A typical input for parallel processing in exported LLM model consists of 2
35-
// tensors of shapes [1, N] and [1], where N is the number of tokens. Hovewer,
36-
// some exported models require inputs of shapes [1, N] and [N], which needs
37-
// to be marked before using LLM runner.
38-
bool extended_input_mode_ = false;
3933
};
4034
} // namespace models::llm
4135

packages/react-native-executorch/common/rnexecutorch/models/voice_activity_detection/Utils.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ size_t getNonSpeechClassProbabilites(const executorch::aten::Tensor &tensor,
55
size_t numClass, size_t size,
66
std::vector<float> &resultVector,
77
size_t startIdx) {
8-
const auto* rawData = tensor.const_data_ptr<float>();
8+
const auto *rawData = tensor.const_data_ptr<float>();
99
for (size_t i = 0; i < size; i++) {
1010
resultVector[startIdx + i] = rawData[numClass * i];
1111
}
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include "arange_util.h"
10+
11+
namespace torch::executor::native {
12+
#define ET_ARANGE_IMPL(ctx, start, numel, step, out, op_name) \
13+
ET_SWITCH_REALHBF16_TYPES(out.scalar_type(), ctx, op_name, CTYPE, [&]() { \
14+
auto out_data = out.mutable_data_ptr<CTYPE>(); \
15+
for (executorch::aten::SizesType i = 0; i < numel; ++i) { \
16+
out_data[i] = static_cast<CTYPE>(start + i * step); \
17+
} \
18+
})
19+
20+
executorch::aten::SizesType compute_arange_out_size(double start, double end,
21+
double step) {
22+
executorch::aten::SizesType numel =
23+
static_cast<executorch::aten::SizesType>(std::ceil((end - start) / step));
24+
25+
ET_CHECK_MSG(numel >= 0,
26+
"numel should be non-negative, but got (%" PRId64
27+
"). start (%f), end (%f), step (%f)",
28+
static_cast<int64_t>(numel), start, end, step);
29+
return numel;
30+
}
31+
32+
void arange_out_impl(KernelRuntimeContext &ctx, double start, double end,
33+
double step, Tensor &out) {
34+
(void)ctx;
35+
executorch::aten::SizesType numel = compute_arange_out_size(start, end, step);
36+
ET_ARANGE_IMPL(ctx, start, numel, step, out, "arange.start_out");
37+
}
38+
39+
void arange_out_impl(KernelRuntimeContext &ctx, double end, Tensor &out) {
40+
(void)ctx;
41+
ET_ARANGE_IMPL(ctx, 0.0, end, 1.0, out, "arange.out");
42+
}
43+
44+
} // namespace torch::executor::native
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#pragma once
10+
11+
#include "kernel_includes.h"
12+
13+
namespace torch::executor::native {
14+
15+
executorch::aten::SizesType compute_arange_out_size(double start, double end,
16+
double step);
17+
18+
inline executorch::aten::SizesType compute_arange_out_size(double end) {
19+
return compute_arange_out_size(0.0, end, 1.0);
20+
}
21+
22+
void arange_out_impl(KernelRuntimeContext &ctx, double start, double end,
23+
double step, Tensor &out);
24+
25+
void arange_out_impl(KernelRuntimeContext &ctx, double end, Tensor &out);
26+
27+
inline void arange_out_impl(double start, double end, double step,
28+
Tensor &out) {
29+
KernelRuntimeContext ctx;
30+
arange_out_impl(ctx, start, end, step, out);
31+
}
32+
33+
inline void arange_out_impl(double end, Tensor &out) {
34+
KernelRuntimeContext ctx;
35+
arange_out_impl(ctx, 0.0, end, 1.0, out);
36+
}
37+
} // namespace torch::executor::native
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
#pragma once
9+
// constants for LLM runtime
10+
namespace executorch::extension::llm {
11+
12+
// Runtime metadata key constants
13+
inline constexpr auto kEnableDynamicShape = "enable_dynamic_shape";
14+
inline constexpr auto kBosId = "get_bos_id";
15+
inline constexpr auto kEosIds = "get_eos_ids";
16+
inline constexpr auto kMaxSeqLen = "get_max_seq_len";
17+
inline constexpr auto kMaxContextLen = "get_max_context_len";
18+
inline constexpr auto kVocabSize = "get_vocab_size";
19+
inline constexpr auto kUseKVCache = "use_kv_cache";
20+
inline constexpr auto kUseSDPAWithKVCache = "use_sdpa_with_kv_cache";
21+
22+
// Multimodal method name conventions
23+
inline constexpr auto kVisionEncoderMethod = "vision_encoder";
24+
inline constexpr auto kAudioEncoderMethod = "audio_encoder";
25+
inline constexpr auto kTokenEmbeddingMethod = "token_embedding";
26+
inline constexpr auto kTextModelMethod = "text_decoder";
27+
28+
} // namespace executorch::extension::llm

0 commit comments

Comments
 (0)