Skip to content

Commit c5d744e

Browse files
authored
Merge branch 'main' into export-D79850580
2 parents 759a3c5 + 6d56713 commit c5d744e

File tree

8 files changed

+268
-39
lines changed

8 files changed

+268
-39
lines changed

examples/models/llama/evaluate/eager_eval.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import torch
1111

1212
from lm_eval.models.huggingface import HFLM as eval_wrapper
13+
from pytorch_tokenizers.hf_tokenizer import HuggingFaceTokenizer
1314
from pytorch_tokenizers.llama2c import Llama2cTokenizer as SentencePieceTokenizer
1415
from pytorch_tokenizers.tiktoken import TiktokenTokenizer as Tiktoken
1516

@@ -24,7 +25,7 @@ class EagerEvalWrapper(eval_wrapper):
2425
def __init__(
2526
self,
2627
model: nn.Module,
27-
tokenizer: Union[SentencePieceTokenizer, Tiktoken],
28+
tokenizer: Union[SentencePieceTokenizer, Tiktoken, HuggingFaceTokenizer],
2829
max_seq_length: Optional[int] = None,
2930
use_kv_cache: bool = False,
3031
):

examples/qualcomm/oss_scripts/llama/TARGETS

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,30 @@ python_library(
1515
],
1616
)
1717

18+
python_library(
19+
name = "decoder_utils",
20+
srcs = [
21+
"decoder_utils.py",
22+
],
23+
deps = [
24+
"//caffe2:torch",
25+
"//executorch/examples/models/llama:eval_library",
26+
],
27+
)
28+
29+
python_library(
30+
name = "decoder_constants",
31+
srcs = [
32+
"decoder_constants.py",
33+
],
34+
)
35+
1836
python_library(
1937
name = "llama_lib",
2038
srcs = ["llama.py"],
2139
deps = [
40+
":decoder_constants",
41+
":decoder_utils",
2242
"//executorch/examples/models/llama:source_transformation",
2343
"//caffe2:torch",
2444
"//executorch/backends/qualcomm/partition:partition",

examples/qualcomm/oss_scripts/llama/decoder_utils.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,14 +44,15 @@ def __init__(
4444
tokenizer: Union[
4545
SentencePieceTokenizer, TiktokenTokenizer, HuggingFaceTokenizer
4646
],
47-
max_seq_length: Optional[int],
47+
max_seq_length: int,
4848
ar_len: int,
4949
use_kv_cache: bool,
5050
get_example_inputs: Callable,
5151
kv_updater: Callable,
5252
use_i64_token: bool,
5353
):
5454
# n seq len = n-1 cache len, so we len(inps) = n-1 during _model_call
55+
assert max_seq_length is not None, "max_seq_length must be provided"
5556
super().__init__(
5657
model=model, tokenizer=tokenizer, max_seq_length=max_seq_length - 1
5758
)
@@ -119,8 +120,10 @@ def __init__(
119120
for method in program.execution_plan:
120121
# Don't use tokenizer.n_words, the numbers are off once calling get_tokenizer()
121122
if method.name == "get_vocab_size":
123+
# pyre-ignore
122124
self.output_vocab_size = method.values[0].val.int_val
123125
if method.name == "get_max_seq_len":
126+
# pyre-ignore
124127
pte_max_seq_len = method.values[0].val.int_val
125128
assert self.output_vocab_size is not None, "Couldn't find the vocab size"
126129
assert pte_max_seq_len is not None, "Couldn't find the max_seq_len from pte"
@@ -156,6 +159,7 @@ def __init__(
156159
)
157160
self.adb.push(inputs=[], input_list="", files=[self.runtime_tokenizer_path])
158161
# n seq len = n-1 cache len, so we len(inps) = n-1 during _model_call
162+
# pyre-ignore
159163
super().__init__(None, tokenizer, max_seq_length - 1)
160164

161165
def _model_call(self, inps):
@@ -278,6 +282,7 @@ def kv_inference(
278282
else:
279283
raise RuntimeError("Unknown tokenizer")
280284
else:
285+
# pyre-ignore
281286
token_list = prompt.flatten().tolist()
282287
pos = len(token_list) if len(token_list) < ar_len else ar_len
283288
dtype = torch.int64 if use_i64_token else torch.int32
@@ -359,6 +364,7 @@ def prefill_inference(
359364
else:
360365
raise RuntimeError("Unknown tokenizer")
361366
else:
367+
# pyre-ignore
362368
token_list = prompt.flatten().tolist()
363369

364370
pos = len(token_list)
@@ -405,7 +411,7 @@ def graph_module_inference(
405411
max_seq_len=512,
406412
kv_updater=smart_mask_updater,
407413
use_i64_token=False,
408-
event_name: str = None,
414+
event_name: Optional[str] = None,
409415
):
410416
if args.tasks is None:
411417
if use_kv_cache:

extension/module/module.cpp

Lines changed: 7 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,6 @@ runtime::Error Module::load_method(
210210
method_holder.memory_manager.get(),
211211
event_tracer ? event_tracer : this->event_tracer(),
212212
data_map_.get()));
213-
method_holder.inputs.resize(method_holder.method->inputs_size());
214213
methods_.emplace(method_name, std::move(method_holder));
215214
}
216215
return runtime::Error::Ok;
@@ -233,28 +232,10 @@ runtime::Result<std::vector<runtime::EValue>> Module::execute(
233232
const std::vector<runtime::EValue>& input_values) {
234233
ET_CHECK_OK_OR_RETURN_ERROR(load_method(method_name));
235234
auto& method = methods_.at(method_name).method;
236-
auto& inputs = methods_.at(method_name).inputs;
237-
238-
ET_CHECK_OR_RETURN_ERROR(
239-
input_values.size() <= inputs.size(),
240-
InvalidArgument,
241-
"input size: %zu does not match method input size: %zu",
242-
input_values.size(),
243-
inputs.size());
244-
for (size_t i = 0; i < input_values.size(); ++i) {
245-
if (!input_values[i].isNone()) {
246-
inputs[i] = input_values[i];
247-
}
235+
for (auto index = 0; index < input_values.size(); ++index) {
236+
ET_CHECK_OK_OR_RETURN_ERROR(method->set_input(input_values[index], index));
248237
}
249-
for (size_t i = 0; i < inputs.size(); ++i) {
250-
ET_CHECK_OR_RETURN_ERROR(
251-
!inputs[i].isNone(), InvalidArgument, "input %zu is none", i);
252-
}
253-
ET_CHECK_OK_OR_RETURN_ERROR(
254-
method->set_inputs(executorch::aten::ArrayRef<runtime::EValue>(
255-
inputs.data(), inputs.size())));
256238
ET_CHECK_OK_OR_RETURN_ERROR(method->execute());
257-
258239
const auto outputs_size = method->outputs_size();
259240
std::vector<runtime::EValue> outputs(outputs_size);
260241
ET_CHECK_OK_OR_RETURN_ERROR(
@@ -268,23 +249,17 @@ runtime::Error Module::set_input(
268249
const runtime::EValue& input_value,
269250
size_t input_index) {
270251
ET_CHECK_OK_OR_RETURN_ERROR(load_method(method_name));
271-
methods_.at(method_name).inputs.at(input_index) = input_value;
272-
return runtime::Error::Ok;
252+
auto& method = methods_.at(method_name).method;
253+
return method->set_input(input_value, input_index);
273254
}
274255

275256
runtime::Error Module::set_inputs(
276257
const std::string& method_name,
277258
const std::vector<runtime::EValue>& input_values) {
278259
ET_CHECK_OK_OR_RETURN_ERROR(load_method(method_name));
279-
auto& inputs = methods_.at(method_name).inputs;
280-
ET_CHECK_OR_RETURN_ERROR(
281-
inputs.size() == input_values.size(),
282-
InvalidArgument,
283-
"input size: %zu does not match method input size: %zu",
284-
input_values.size(),
285-
inputs.size());
286-
inputs = input_values;
287-
return runtime::Error::Ok;
260+
auto& method = methods_.at(method_name).method;
261+
return method->set_inputs(executorch::aten::ArrayRef<runtime::EValue>(
262+
input_values.data(), input_values.size()));
288263
}
289264

290265
runtime::Error Module::set_output(

extension/module/module.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -522,7 +522,6 @@ class Module {
522522
std::unique_ptr<runtime::HierarchicalAllocator> planned_memory;
523523
std::unique_ptr<runtime::MemoryManager> memory_manager;
524524
std::unique_ptr<Method> method;
525-
std::vector<runtime::EValue> inputs;
526525
};
527526

528527
std::string file_path_;

extension/module/test/module_test.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ TEST_F(ModuleTest, TestForward) {
267267
EXPECT_TENSOR_CLOSE(result->at(0).toTensor(), *expected.get());
268268

269269
auto tensor2 = make_tensor_ptr({2, 2}, {2.f, 3.f, 4.f, 5.f});
270-
const auto result2 = module->forward({tensor2, tensor2});
270+
const auto result2 = module->forward({tensor2, tensor2, 1.0});
271271
EXPECT_EQ(result2.error(), Error::Ok);
272272

273273
const auto expected2 = make_tensor_ptr({2, 2}, {4.f, 6.f, 8.f, 10.f});

extension/pybindings/pybindings.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ class Module final {
358358

359359
MallocMemoryAllocator runtime_allocator_;
360360

361-
MemoryAllocator temp_allocator_{MemoryAllocator(0, nullptr)};
361+
MallocMemoryAllocator temp_allocator_{};
362362

363363
std::vector<std::vector<uint8_t>> non_const_buffers_;
364364

@@ -1061,7 +1061,7 @@ class ProgramMemory {
10611061

10621062
MallocMemoryAllocator runtime_allocator_;
10631063

1064-
MemoryAllocator temp_allocator_{MemoryAllocator(0, nullptr)};
1064+
MallocMemoryAllocator temp_allocator_{};
10651065

10661066
std::vector<std::vector<uint8_t>> non_const_buffers_;
10671067

0 commit comments

Comments
 (0)