Skip to content

Commit a603e48

Browse files
committed
support int32 token in runner and profiling
1 parent 3f188ff commit a603e48

File tree

4 files changed

+39
-5
lines changed

4 files changed

+39
-5
lines changed

backends/qualcomm/runtime/QnnExecuTorchBackend.cpp

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@
1111
#include <executorch/backends/qualcomm/runtime/QnnExecuTorchBackend.h>
1212
#include <executorch/backends/qualcomm/runtime/QnnManager.h>
1313
#include <executorch/backends/qualcomm/schema_generated.h>
14+
#include <chrono>
15+
16+
// #include <fstream>
1417
namespace executorch {
1518
namespace backends {
1619
namespace qnn {
@@ -26,6 +29,7 @@ using executorch::runtime::MemoryAllocator;
2629
using executorch::runtime::Result;
2730
// ========== Public method implementations =========================
2831
constexpr const char* QNN_COMPILE_SPEC = "qnn_compile_spec";
32+
// static int hi = 0;
2933
Result<DelegateHandle*> QnnExecuTorchBackend::init(
3034
BackendInitContext& context,
3135
FreeableBuffer* processed,
@@ -36,6 +40,11 @@ Result<DelegateHandle*> QnnExecuTorchBackend::init(
3640

3741
qnn_context_blob.buffer = const_cast<void*>(processed->data());
3842
qnn_context_blob.nbytes = processed->size();
43+
// std::string path_ = "model_"+std::to_string(hi)+".bin";
44+
// std::ofstream fout(path_, std::ios::binary);
45+
// fout.write(static_cast<const char*>(processed->data()), static_cast<int64_t>(processed->size()));
46+
// fout.flush();
47+
// hi++;
3948

4049
// convert CompileSpec to qnn ExecuTorch option
4150
for (auto& compile_spec : compile_specs) {
@@ -180,11 +189,12 @@ Result<DelegateHandle*> QnnExecuTorchBackend::init(
180189
}
181190
return qnn_manager;
182191
}
183-
192+
// static int qq = 0;
184193
Error QnnExecuTorchBackend::execute(
185194
BackendExecutionContext& context,
186195
DelegateHandle* handle,
187196
EValue** args) const {
197+
auto begin = std::chrono::high_resolution_clock::now();
188198
QnnManager* qnn_manager = static_cast<QnnManager*>(handle);
189199

190200
std::vector<std::shared_ptr<TensorWrapper>> input_tensors =
@@ -202,6 +212,14 @@ Error QnnExecuTorchBackend::execute(
202212
// update data ptr only should be fine
203213
input_tensors[i]->FillDataBuffer(
204214
args[i]->toTensor().const_data_ptr(), false /* copy_data */);
215+
// if(qq < input_tensors.size()){
216+
// std::string path_ = "qinput_"+std::to_string(qq)+".raw";
217+
// std::ofstream fout(path_, std::ios::binary);
218+
// fout.write(static_cast<const char*>(args[i]->toTensor().const_data_ptr()), input_tensors[i]->GetBytes());
219+
// fout.flush();
220+
// qq++;
221+
// }
222+
205223
}
206224
input_tensor_structs.push_back(input_tensors[i]->CloneTensorStruct());
207225
}
@@ -232,7 +250,12 @@ Error QnnExecuTorchBackend::execute(
232250
qnn_manager->ProfileExecuteData(context.event_tracer()) == Error::Ok,
233251
Internal,
234252
"Fail to profile graph");
253+
auto end = std::chrono::high_resolution_clock::now();
235254

255+
auto elapsed = std::chrono::duration_cast<std::chrono::microseconds>(end -
256+
begin);
257+
QNN_EXECUTORCH_LOG_INFO(
258+
"QNN Graph Execute Time in QnnExecuTorchBackend: %ld us", elapsed.count());
236259
return Error::Ok;
237260
}
238261

extension/llm/runner/text_prefiller.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ ::executorch::runtime::Result<uint64_t> TextPrefiller::prefill(
4141
auto tokens = from_blob(
4242
prompt_tokens.data(),
4343
{1, num_prompt_tokens},
44-
exec_aten::ScalarType::Long);
44+
exec_aten::ScalarType::Int);
4545

4646
auto start_pos_tensor =
4747
from_blob(&start_pos, {1}, exec_aten::ScalarType::Long);
@@ -60,7 +60,7 @@ ::executorch::runtime::Result<uint64_t> TextPrefiller::prefill(
6060
cur_token = prompt_tokens[0];
6161

6262
// initialize tensor wrappers
63-
auto tokens = from_blob(&cur_token, {1, 1}, exec_aten::ScalarType::Long);
63+
auto tokens = from_blob(&cur_token, {1, 1}, exec_aten::ScalarType::Int);
6464

6565
auto start_pos_tensor =
6666
from_blob(&start_pos, {1}, exec_aten::ScalarType::Long);

extension/llm/runner/text_token_generator.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ class ET_EXPERIMENTAL TextTokenGenerator {
7171

7272
// initialize tensor wrappers
7373
auto tokens_managed = from_blob(
74-
token_data.data(), token_shape, executorch::aten::ScalarType::Long);
74+
token_data.data(), token_shape, executorch::aten::ScalarType::Int);
7575
auto start_pos_managed =
7676
from_blob(&pos, {1}, executorch::aten::ScalarType::Long);
7777

runtime/executor/method.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@
2626
#include <executorch/runtime/platform/log.h>
2727
#include <executorch/runtime/platform/profiler.h>
2828
#include <executorch/schema/program_generated.h>
29-
29+
#include <chrono>
30+
#include <iostream>
3031
namespace executorch {
3132
namespace runtime {
3233

@@ -1004,6 +1005,7 @@ ET_NODISCARD Error Method::get_inputs(EValue* input_evalues, size_t length) {
10041005
}
10051006

10061007
Error Method::execute_instruction() {
1008+
auto begin = std::chrono::high_resolution_clock::now();
10071009
auto& chain = chains_[step_state_.chain_idx];
10081010
auto instructions = chain.s_chain_->instructions();
10091011

@@ -1030,6 +1032,9 @@ Error Method::execute_instruction() {
10301032
chain.kernels_[step_state_.instr_idx](context, args.data());
10311033
// We reset the temp_allocator after the switch statement
10321034
err = context.failure_state();
1035+
auto op_index = instruction->instr_args_as_KernelCall()->op_index();
1036+
auto op = serialization_plan_->operators()->Get(op_index);
1037+
std::cout <<"run op"<<op->name()->c_str()<<std::endl;
10331038
if (err != Error::Ok) {
10341039
// We know that instr_args_as_KernelCall is non-null because it was
10351040
// checked at init time.
@@ -1151,6 +1156,12 @@ Error Method::execute_instruction() {
11511156
if (err == Error::Ok) {
11521157
step_state_.instr_idx = next_instr_idx;
11531158
}
1159+
auto end = std::chrono::high_resolution_clock::now();
1160+
1161+
auto elapsed = std::chrono::duration_cast<std::chrono::microseconds>(end -
1162+
begin);
1163+
std::cout << "instruction->instr_args_type()" << static_cast<int>(instruction->instr_args_type()) << std::endl;
1164+
std::cout<< "delegates_[delegate_idx].Execute Time:" <<elapsed.count() << std::endl;
11541165
return err;
11551166
}
11561167

0 commit comments

Comments
 (0)