Skip to content

Commit b922f24

Browse files
committed
add native script
1 parent 49e6d98 commit b922f24

File tree

9 files changed

+750
-85
lines changed

9 files changed

+750
-85
lines changed

backends/qualcomm/builders/op_slice_copy.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def define_node(
5656
if start < 0:
5757
start = start % input_tensor.shape[dim]
5858

59-
if len(node.args) > 3:
59+
if len(node.args) > 3 and node.args[3] is not None:
6060
end = min(cast(int, node.args[3]), input_tensor.shape[dim])
6161
if end < 0:
6262
end = end % input_tensor.shape[dim]

backends/qualcomm/qnn_preprocess.py

Lines changed: 28 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -78,10 +78,7 @@ def _build_op_wrappers(
7878
)
7979
assert node.target == context_loader_target, err_msg
8080
# if graph has context binary loader node, return directly
81-
return PreprocessResult(
82-
processed_bytes=node.meta[OpContextLoader.meta_ctx_bin],
83-
debug_handle_map={},
84-
)
81+
return node.meta[OpContextLoader.meta_ctx_bin]
8582
except:
8683
raise RuntimeError(err_msg)
8784

@@ -161,30 +158,41 @@ def preprocess_multimethod(
161158
generate_qnn_executorch_option(compile_spec)
162159
)
163160
qnn_manager.Init()
164-
py_op_wrapper_list = []
161+
py_op_wrapper_list, ctx_binary_list = [], []
165162
for j, programs in enumerate(edge_programs.values()):
166163
logger.info(f"Processing Method({j}): ({i+1}/{num_sub_graphs})")
167164
py_op_wrappers = QnnBackend._build_op_wrappers(
168165
programs[i],
169166
qnn_manager.IsTensorDump(),
170167
option.op_package_options.op_package_infos,
171168
)
172-
py_op_wrapper_list.append(
173-
[py_op_wrapper.GetOpWrapper() for py_op_wrapper in py_op_wrappers]
174-
)
169+
if isinstance(py_op_wrappers, bytes):
170+
ctx_binary_list.append(py_op_wrappers)
171+
else:
172+
py_op_wrapper_list.append(
173+
[py_op_wrapper.GetOpWrapper() for py_op_wrapper in py_op_wrappers]
174+
)
175175

176-
qnn_context_binary = qnn_manager.Compile(graph_name, py_op_wrapper_list)
177-
assert (
178-
len(qnn_context_binary) != 0
179-
), "Failed to generate Qnn context binary."
180-
qnn_manager.Destroy()
181-
# methods should share the same context binary for current partition
182-
for key in edge_programs.keys():
183-
all_processed_results[key].append(
184-
PreprocessResult(
185-
processed_bytes=bytes(qnn_context_binary),
186-
debug_handle_map={},
176+
if len(py_op_wrapper_list) == len(edge_programs.values()):
177+
qnn_context_binary = qnn_manager.Compile(graph_name, py_op_wrapper_list)
178+
assert (
179+
len(qnn_context_binary) != 0
180+
), "Failed to generate Qnn context binary."
181+
qnn_manager.Destroy()
182+
# methods should share the same context binary for current partition
183+
for key in edge_programs.keys():
184+
all_processed_results[key].append(
185+
PreprocessResult(
186+
processed_bytes=bytes(qnn_context_binary),
187+
debug_handle_map={},
188+
)
187189
)
188-
)
190+
elif len(ctx_binary_list) == len(edge_programs.values()):
191+
for i, key in enumerate(edge_programs.keys()):
192+
all_processed_results[key].append(
193+
PreprocessResult(processed_bytes=ctx_binary_list[i])
194+
)
195+
else:
196+
raise RuntimeError("Hybrid compilation is not supported")
189197

190198
return all_processed_results

benchmark.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def get_build_dir(backend):
5252

5353
runner = {
5454
"qnn": f"{get_build_dir(backend)}/examples/qualcomm/executor_runner/qnn_executor_runner",
55-
"xnn": f"{get_build_dir(backend)}/backends/xnnpack/xnn_executor_runner",
55+
"xnn": f"{get_build_dir(backend)}/executor_runner",
5656
}
5757
artifacts = {
5858
"qnn": [
@@ -110,8 +110,8 @@ def get_cmds(backend, pte_path, iteration):
110110
" ".join(
111111
[
112112
f"cd {workspace} &&",
113-
"chmod +x ./xnn_executor_runner &&",
114-
f"./xnn_executor_runner {' '.join(cmd_args[backend])}",
113+
"chmod +x ./executor_runner &&",
114+
f"./executor_runner {' '.join(cmd_args[backend])}",
115115
]
116116
)
117117
),
@@ -134,9 +134,9 @@ def get_cmds(backend, pte_path, iteration):
134134
" ".join(
135135
[
136136
f"cd {workspace} &&",
137-
"chmod +x ./xnn_executor_runner &&",
137+
"chmod +x ./executor_runner &&",
138138
f"chmod +x {memory_script_file} &&",
139-
f"./{memory_script_file} ./xnn_executor_runner {' '.join(cmd_args[backend])}",
139+
f"./{memory_script_file} ./executor_runner {' '.join(cmd_args[backend])}",
140140
]
141141
)
142142
),

build_xnnpack.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ cmake \
2929
-DANDROID_ABI='arm64-v8a' \
3030
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
3131
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
32+
-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
3233
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
3334
-DEXECUTORCH_BUILD_XNNPACK=ON \
3435
-DEXECUTORCH_ENABLE_LOGGING=ON \

examples/portable/executor_runner/executor_runner.cpp

Lines changed: 133 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,11 @@ DEFINE_string(
5252
"model.pte",
5353
"Model serialized in flatbuffer format.");
5454
DEFINE_uint32(num_executions, 1, "Number of times to run the model.");
55+
DEFINE_string(input_list_path, "input_list.txt", "Model input list path.");
56+
DEFINE_string(
57+
output_folder_path,
58+
"outputs",
59+
"Executorch inference data output path.");
5560
#ifdef ET_EVENT_TRACER_ENABLED
5661
DEFINE_string(etdump_path, "model.etdump", "Write ETDump data to this path.");
5762
#endif // ET_EVENT_TRACER_ENABLED
@@ -271,57 +276,143 @@ int main(int argc, char** argv) {
271276
// because inputs whose space gets reused by memory planning (if
272277
// any such inputs exist) will not be preserved for the next
273278
// execution.
274-
275-
ET_CHECK_MSG(
279+
std::ifstream input_list(FLAGS_input_list_path);
280+
if (input_list.is_open()) {
281+
size_t num_inputs = method->inputs_size();
282+
ET_LOG(Info, "Number of inputs: %zu", num_inputs);
283+
284+
auto split = [](std::string s, std::string delimiter) {
285+
size_t pos_start = 0, pos_end, delim_len = delimiter.length();
286+
std::string token;
287+
std::vector<std::string> res;
288+
289+
while ((pos_end = s.find(delimiter, pos_start)) != std::string::npos) {
290+
token = s.substr(pos_start, pos_end - pos_start);
291+
pos_start = pos_end + delim_len;
292+
res.push_back(token);
293+
}
294+
res.push_back(s.substr(pos_start));
295+
return res;
296+
};
297+
298+
std::string file_path;
299+
int inference_index = 0;
300+
double elapsed_time = 0;
301+
while (std::getline(input_list, file_path)) {
302+
auto input_files = split(file_path, " ");
303+
if (input_files.size() == 0) {
304+
break;
305+
}
306+
ET_CHECK_MSG(
307+
input_files.size() == num_inputs,
308+
"Number of inputs (%zu) mismatch with input files (%zu)",
309+
num_inputs,
310+
input_files.size());
311+
312+
std::vector<std::vector<char>> input_buf(num_inputs);
313+
for (int input_index = 0; input_index < num_inputs; ++input_index) {
314+
MethodMeta method_meta = method->method_meta();
315+
Result<executorch::runtime::TensorInfo> tensor_meta =
316+
method_meta.input_tensor_meta(input_index);
317+
318+
std::ifstream fin(input_files[input_index], std::ios::binary);
319+
fin.seekg(0, fin.end);
320+
size_t file_size = fin.tellg();
321+
322+
input_buf[input_index].resize(file_size);
323+
fin.seekg(0, fin.beg);
324+
fin.read(
325+
static_cast<char*>(input_buf[input_index].data()),
326+
file_size);
327+
fin.close();
328+
329+
ET_CHECK_MSG(
330+
file_size == tensor_meta->nbytes(),
331+
"Input(%d) size mismatch. file bytes: %zu, tensor bytes: %zu",
332+
input_index,
333+
file_size,
334+
tensor_meta->nbytes());
335+
336+
auto impl = executorch::aten::TensorImpl(
337+
tensor_meta->scalar_type(),
338+
/*dim=*/tensor_meta->sizes().size(),
339+
const_cast<executorch::aten::TensorImpl::SizesType*>(tensor_meta->sizes().data()),
340+
input_buf[input_index].data(),
341+
const_cast<executorch::aten::TensorImpl::DimOrderType*>(
342+
tensor_meta->dim_order().data()));
343+
Error ret = method->set_input(executorch::aten::Tensor(&impl), input_index);
344+
ET_CHECK_MSG(
345+
ret == Error::Ok, "Failed to set input tensor: %d", (int)ret);
346+
}
347+
Error status = method->execute();
348+
std::vector<EValue> outputs(method->outputs_size());
349+
status = method->get_outputs(outputs.data(), method->outputs_size());
350+
ET_CHECK(status == Error::Ok);
351+
for (size_t output_index = 0; output_index < method->outputs_size();
352+
output_index++) {
353+
auto output_tensor = outputs[output_index].toTensor();
354+
size_t nbytes = output_tensor.nbytes();
355+
auto output_file_name = FLAGS_output_folder_path + "/output_" +
356+
std::to_string(inference_index) + "_" +
357+
std::to_string(output_index) + ".raw";
358+
std::ofstream fout(output_file_name.c_str(), std::ios::binary);
359+
fout.write(output_tensor.const_data_ptr<char>(), nbytes);
360+
fout.close();
361+
}
362+
++inference_index;
363+
}
364+
} else {
365+
ET_CHECK_MSG(
276366
inputs.ok(),
277367
"Could not prepare inputs: 0x%" PRIx32,
278368
(uint32_t)inputs.error());
279-
ET_LOG(Debug, "Inputs prepared.");
280-
auto before_exec = std::chrono::high_resolution_clock::now();
281-
Error status = method->execute();
282-
auto after_exec = std::chrono::high_resolution_clock::now();
283-
double interval_1st_infs =
284-
std::chrono::duration_cast<std::chrono::microseconds>(
285-
after_exec - before_exec)
286-
.count() /
287-
1000.0;
288-
ET_CHECK_MSG(
289-
status == Error::Ok,
290-
"Execution of method %s failed with status 0x%" PRIx32,
291-
method_name,
292-
(uint32_t)status);
293-
294-
// Run the model.
295-
before_exec = std::chrono::high_resolution_clock::now();
296-
for (uint32_t i = 0; i < FLAGS_num_executions; i++) {
297-
status = method->execute();
369+
ET_LOG(Debug, "Inputs prepared.");
370+
371+
auto before_exec = std::chrono::high_resolution_clock::now();
372+
Error status = method->execute();
373+
auto after_exec = std::chrono::high_resolution_clock::now();
374+
double interval_1st_infs =
375+
std::chrono::duration_cast<std::chrono::microseconds>(
376+
after_exec - before_exec)
377+
.count() /
378+
1000.0;
298379
ET_CHECK_MSG(
299380
status == Error::Ok,
300381
"Execution of method %s failed with status 0x%" PRIx32,
301382
method_name,
302383
(uint32_t)status);
303-
}
304-
after_exec = std::chrono::high_resolution_clock::now();
305-
double interval_infs = std::chrono::duration_cast<std::chrono::microseconds>(
306-
after_exec - before_exec)
307-
.count() /
308-
1000.0 / FLAGS_num_executions;
309-
310-
if (FLAGS_dump_statistics) {
311-
auto output_file_name = "statistics.txt";
312-
std::ofstream fout(output_file_name);
313-
fout << "load: " + std::to_string(interval_load)
314-
<< "\n1st: " + std::to_string(interval_1st_infs)
315-
<< "\navg: " + std::to_string(interval_infs) << std::endl;
316-
fout.close();
317-
}
318-
ET_LOG(Info, "Model executed successfully.");
319384

320-
if (tracer.get_event_tracer()) {
321-
// Dump ETDump data containing profiling/debugging data to file specified in
322-
// command line flag.
323-
status = tracer.write_etdump_to_file();
324-
ET_CHECK_MSG(status == Error::Ok, "Failed to save ETDump file.");
385+
// Run the model.
386+
before_exec = std::chrono::high_resolution_clock::now();
387+
for (uint32_t i = 0; i < FLAGS_num_executions; i++) {
388+
status = method->execute();
389+
ET_CHECK_MSG(
390+
status == Error::Ok,
391+
"Execution of method %s failed with status 0x%" PRIx32,
392+
method_name,
393+
(uint32_t)status);
394+
}
395+
after_exec = std::chrono::high_resolution_clock::now();
396+
double interval_infs = std::chrono::duration_cast<std::chrono::microseconds>(
397+
after_exec - before_exec)
398+
.count() /
399+
1000.0 / FLAGS_num_executions;
400+
401+
if (FLAGS_dump_statistics) {
402+
auto output_file_name = "statistics.txt";
403+
std::ofstream fout(output_file_name);
404+
fout << "load: " + std::to_string(interval_load)
405+
<< "\n1st: " + std::to_string(interval_1st_infs)
406+
<< "\navg: " + std::to_string(interval_infs) << std::endl;
407+
fout.close();
408+
}
409+
ET_LOG(Info, "Model executed successfully.");
410+
if (tracer.get_event_tracer()) {
411+
// Dump ETDump data containing profiling/debugging data to file specified in
412+
// command line flag.
413+
status = tracer.write_etdump_to_file();
414+
ET_CHECK_MSG(status == Error::Ok, "Failed to save ETDump file.");
415+
}
325416
}
326417

327418
return 0;

examples/qualcomm/oss_scripts/albert.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def main(args):
3636
data_size = 100
3737

3838
model_name = "albert/albert-base-v2"
39-
tokenizer = AutoTokenizer.from_pretrained(model_name, hidden_act="gelu")
39+
tokenizer = AutoTokenizer.from_pretrained(model_name)
4040

4141
if args.ci:
4242
random_ids = torch.randint(low=0, high=100, size=(1, 100), dtype=torch.int32)

examples/qualcomm/oss_scripts/dit.py

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -30,28 +30,43 @@
3030

3131
def get_rvlcdip_dataset(data_size):
3232
from datasets import load_dataset
33+
from torch.utils.data import Dataset
3334

34-
dataset = load_dataset("nielsr/rvl_cdip_10_examples_per_class", split="train")
35-
processor = AutoImageProcessor.from_pretrained(
36-
"microsoft/dit-base-finetuned-rvlcdip"
37-
)
35+
def get_data_loader():
36+
class DitDataset(Dataset):
37+
def __init__(self, data_size) -> None:
38+
self.data_size = data_size
39+
self.dataset = self._get_dataset()
40+
self.processor = AutoImageProcessor.from_pretrained("microsoft/dit-base-finetuned-rvlcdip")
41+
42+
def _get_dataset(self):
43+
dataset = list(load_dataset("nielsr/rvl_cdip_10_examples_per_class", split="test"))
44+
return dataset
45+
46+
def __getitem__(self, idx):
47+
return (
48+
self.processor(images=self.dataset[idx]["image"].convert("RGB"), return_tensors="pt"),
49+
self.dataset[idx]["label"]
50+
)
51+
52+
def __len__(self):
53+
return len(self.dataset)
54+
55+
dataset = DitDataset(data_size)
56+
torch.manual_seed(3407)
57+
return torch.utils.data.DataLoader(dataset, batch_size=None, shuffle=True)
3858

3959
# prepare input data
4060
inputs, targets, input_list = [], [], ""
41-
for index, data in enumerate(dataset):
61+
for index, (feature, target) in enumerate(get_data_loader()):
4262
if index >= data_size:
4363
break
44-
feature, target = (
45-
processor(images=data["image"].convert("RGB"), return_tensors="pt"),
46-
data["label"],
47-
)
4864
inputs.append((feature["pixel_values"],))
4965
targets.append(torch.tensor(target))
5066
input_list += f"input_{index}_0.raw\n"
5167

5268
return inputs, targets, input_list
5369

54-
5570
def main(args):
5671
skip_node_id_set, skip_node_op_set = parse_skip_delegation_node(args)
5772

0 commit comments

Comments
 (0)