Skip to content

Commit 30bdd3e

Browse files
committed
Update on "raise error when trying to save an etrecord missing essential info"
as title Differential Revision: [D79687142](https://our.internmc.facebook.com/intern/diff/D79687142/) [ghstack-poisoned]
2 parents 13e9bc4 + 9663d40 commit 30bdd3e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+992
-857
lines changed

.ci/scripts/test_model.sh

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,9 @@ test_model_with_qnn() {
199199
EXPORT_SCRIPT=albert
200200
elif [[ "${MODEL_NAME}" == "bert" ]]; then
201201
EXPORT_SCRIPT=bert
202+
elif [[ "${MODEL_NAME}" == "conv_former" ]]; then
203+
EXPORT_SCRIPT=conv_former
204+
EXTRA_FLAGS="--dataset imagenet-mini/val"
202205
elif [[ "${MODEL_NAME}" == "cvt" ]]; then
203206
EXPORT_SCRIPT=cvt
204207
elif [[ "${MODEL_NAME}" == "distilbert" ]]; then
@@ -238,7 +241,7 @@ test_model_with_qnn() {
238241
"cvt"|"dit"|"focalnet"|"mobilevit_v2"|"pvt"|"swin")
239242
SCRIPT_FOLDER=oss_scripts
240243
;;
241-
"albert"|"bert"|"distilbert"|"roberta"|"efficientnet"|"mobilevit_v1")
244+
"albert"|"bert"|"conv_former"|"distilbert"|"roberta"|"efficientnet"|"mobilevit_v1")
242245
pip install evaluate
243246
SCRIPT_FOLDER=oss_scripts
244247
# 16bit models will encounter op validation fail on some operations,

.github/workflows/trunk.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -568,7 +568,7 @@ jobs:
568568
strategy:
569569
matrix:
570570
dtype: [fp32]
571-
model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l]
571+
model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l, conv_former]
572572
fail-fast: false
573573
with:
574574
runner: linux.2xlarge

backends/qualcomm/tests/test_qnn_delegate.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4588,6 +4588,65 @@ def test_static_qwen2_5(self):
45884588
msg["inference_speed"], inference_speed_ref[self.model]
45894589
)
45904590

4591+
def test_qwen3(self):
4592+
if not self.required_envs():
4593+
self.skipTest("missing required envs")
4594+
4595+
prompt = "My favourite condiment is "
4596+
cmds = [
4597+
"python",
4598+
f"{self.executorch_root}/examples/qualcomm/oss_scripts/llama/llama.py",
4599+
"--artifact",
4600+
self.artifact_dir,
4601+
"--build_folder",
4602+
self.build_folder,
4603+
"--model",
4604+
self.model,
4605+
"--ip",
4606+
self.ip,
4607+
"--port",
4608+
str(self.port),
4609+
"--prompt",
4610+
f"{prompt}",
4611+
"--ptq",
4612+
"16a8w",
4613+
"--decoder_model",
4614+
"qwen3_0.6b",
4615+
"--model_mode",
4616+
"hybrid",
4617+
"--prefill_ar_len",
4618+
"32",
4619+
"--max_seq_len",
4620+
"128",
4621+
]
4622+
if self.compile_only:
4623+
cmds.extend(["--compile_only"])
4624+
elif self.device:
4625+
cmds.extend(["--device", self.device])
4626+
if self.host:
4627+
cmds.extend(["--host", self.host])
4628+
elif self.enable_x86_64:
4629+
cmds.extend(["--enable_x86_64"])
4630+
if self.pre_gen_pte:
4631+
cmds.extend(["--pre_gen_pte", self.pre_gen_pte])
4632+
4633+
# Accuracy is bad for now. Just check user's prompt is returned.
4634+
golden_start_with = "My favourite condiment is "
4635+
p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL)
4636+
with Listener((self.ip, self.port)) as listener:
4637+
conn = listener.accept()
4638+
p.communicate()
4639+
msg = json.loads(conn.recv())
4640+
if "Error" in msg:
4641+
self.fail(msg["Error"])
4642+
else:
4643+
model_out = msg["result"][0]
4644+
self.assertTrue(
4645+
model_out.startswith(golden_start_with),
4646+
f"Expected Output: {golden_start_with}. Actual Output: {model_out}",
4647+
)
4648+
self.assertGreaterEqual(msg["inference_speed"], 70) # Lanai
4649+
45914650

45924651
class TestExampleOssScript(TestQNN):
45934652
def test_albert(self):

backends/vulkan/runtime/VulkanBackend.cpp

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -390,18 +390,20 @@ bool maybe_resize_input(
390390
const size_t input_i,
391391
executorch::aten::Tensor& et_tensor) {
392392
ValueRef in_tensor_ref = graph->inputs()[input_i].value;
393-
vTensorPtr in_tensor = graph->get_tensor(in_tensor_ref);
393+
394+
const std::vector<int64_t> in_tensor_vk_sizes =
395+
graph->sizes_of(in_tensor_ref);
394396

395397
ET_CHECK_MSG(
396-
et_tensor.dim() == in_tensor->sizes().size(),
398+
et_tensor.dim() == in_tensor_vk_sizes.size(),
397399
"Cannot resize input tensor: old ndim %zu does not match new ndim %zu",
398-
static_cast<size_t>(in_tensor->sizes().size()),
400+
static_cast<size_t>(in_tensor_vk_sizes.size()),
399401
static_cast<size_t>(et_tensor.dim()));
400402

401403
bool should_resize = false;
402404
std::vector<int64_t> new_sizes(et_tensor.dim());
403405
for (size_t i = 0; i < et_tensor.dim(); i++) {
404-
if (in_tensor->sizes()[i] != et_tensor.sizes()[i]) {
406+
if (in_tensor_vk_sizes[i] != et_tensor.sizes()[i]) {
405407
should_resize = true;
406408
}
407409
new_sizes.at(i) = et_tensor.sizes()[i];
@@ -411,10 +413,11 @@ bool maybe_resize_input(
411413
graph->resize_input(input_i, new_sizes);
412414
}
413415

416+
const size_t in_tensor_vk_numel = graph->numel_of(in_tensor_ref);
414417
ET_CHECK_MSG(
415-
in_tensor->numel() == et_tensor.numel(),
418+
in_tensor_vk_numel == et_tensor.numel(),
416419
"Vulkan tensor numel %zu does not match ET tensor numel %zu",
417-
static_cast<size_t>(in_tensor->numel()),
420+
static_cast<size_t>(in_tensor_vk_numel),
418421
static_cast<size_t>(et_tensor.numel()));
419422

420423
return should_resize;
@@ -445,12 +448,14 @@ void maybe_resize_output(
445448
const size_t output_i,
446449
executorch::aten::Tensor& et_tensor) {
447450
ValueRef out_tensor_ref = graph->outputs()[output_i].value;
448-
vTensorPtr out_tensor = graph->get_tensor(out_tensor_ref);
451+
452+
const std::vector<int64_t> out_tensor_vk_sizes =
453+
graph->sizes_of(out_tensor_ref);
449454

450455
executorch::aten::SizesType new_output_size[kTensorDimensionLimit];
451-
size_t ndim = out_tensor->sizes().size();
456+
size_t ndim = out_tensor_vk_sizes.size();
452457
for (int i = 0; i < ndim; ++i) {
453-
new_output_size[i] = out_tensor->sizes()[i];
458+
new_output_size[i] = out_tensor_vk_sizes[i];
454459
}
455460

456461
executorch::aten::ArrayRef<executorch::aten::SizesType> output_size{

backends/vulkan/runtime/graph/ComputeGraph.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -704,6 +704,38 @@ utils::uvec3 ComputeGraph::create_local_wg_size(const ValueRef idx) {
704704
return create_local_wg_size(create_global_wg_size(idx));
705705
}
706706

707+
void ComputeGraph::bind_tensor_to_descriptor_set(
708+
const ValueRef ref,
709+
vkapi::PipelineBarrier& pipeline_barrier,
710+
const vkapi::MemoryAccessFlags access_type,
711+
vkapi::DescriptorSet& descriptor_set,
712+
const uint32_t idx) {
713+
vTensorPtr tensor = get_tensor(ref);
714+
if (tensor->buffer()) {
715+
vkapi::VulkanBuffer& buffer = tensor->buffer(
716+
pipeline_barrier, vkapi::PipelineStage::COMPUTE, access_type);
717+
descriptor_set.bind(idx, buffer);
718+
} else {
719+
vkapi::VulkanImage& image = tensor->image(
720+
pipeline_barrier, vkapi::PipelineStage::COMPUTE, access_type);
721+
descriptor_set.bind(idx, image);
722+
}
723+
}
724+
725+
void ComputeGraph::bind_value_to_descriptor_set(
726+
const ValueRef ref,
727+
vkapi::PipelineBarrier& pipeline_barrier,
728+
const vkapi::MemoryAccessFlags access_type,
729+
vkapi::DescriptorSet& descriptor_set,
730+
const uint32_t idx) {
731+
if (val_is_tensor(ref)) {
732+
bind_tensor_to_descriptor_set(
733+
ref, pipeline_barrier, access_type, descriptor_set, idx);
734+
} else if (val_is_staging(ref)) {
735+
descriptor_set.bind(idx, get_staging(ref)->buffer());
736+
}
737+
}
738+
707739
void ComputeGraph::copy_into_staging(
708740
const ValueRef idx,
709741
const void* data,
@@ -891,6 +923,17 @@ void ComputeGraph::execute() {
891923
execute_count_++;
892924
}
893925

926+
void ComputeGraph::virtual_clone(const ValueRef dst, const ValueRef src) {
927+
get_tensor(dst)->virtual_clone(*get_tensor(src));
928+
}
929+
930+
void ComputeGraph::virtual_transpose(
931+
const ValueRef tensor,
932+
const int64_t dim0,
933+
const int64_t dim1) {
934+
get_tensor(tensor)->virtual_transpose(dim0, dim1);
935+
}
936+
894937
void ComputeGraph::resize_input(
895938
const int64_t idx,
896939
const std::vector<int64_t>& new_sizes) {

backends/vulkan/runtime/graph/ComputeGraph.h

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,16 @@ class ComputeGraph final {
248248
return values_.at(idx).is##type_name(); \
249249
}
250250

251-
GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS(vTensorPtr, tensor, Tensor)
251+
protected:
252+
inline vTensorPtr get_tensor(const ValueRef idx) {
253+
return vTensorPtr(this, idx);
254+
}
255+
256+
public:
257+
inline bool val_is_tensor(const ValueRef idx) const {
258+
return values_.at(idx).isTensor();
259+
}
260+
252261
GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS(TensorRefPtr, tref, TensorRef)
253262
GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS(StagingPtr, staging, Staging)
254263
GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS(IntListPtr, int_list, IntList)
@@ -319,6 +328,10 @@ class ComputeGraph final {
319328
return values_.at(idx).toConstTensor().numel();
320329
}
321330

331+
inline size_t staging_buffer_numel_of(const ValueRef idx) const {
332+
return values_.at(idx).toConstTensor().staging_buffer_numel();
333+
}
334+
322335
inline utils::StorageType storage_type_of(const ValueRef idx) const {
323336
return values_.at(idx).toConstTensor().storage_type();
324337
}
@@ -832,6 +845,20 @@ class ComputeGraph final {
832845
*/
833846
utils::uvec3 create_local_wg_size(const ValueRef idx);
834847

848+
void bind_tensor_to_descriptor_set(
849+
const ValueRef ref,
850+
vkapi::PipelineBarrier& pipeline_barrier,
851+
const vkapi::MemoryAccessFlags accessType,
852+
vkapi::DescriptorSet& descriptor_set,
853+
const uint32_t idx);
854+
855+
void bind_value_to_descriptor_set(
856+
const ValueRef ref,
857+
vkapi::PipelineBarrier& pipeline_barrier,
858+
const vkapi::MemoryAccessFlags access_type,
859+
vkapi::DescriptorSet& descriptor_set,
860+
const uint32_t idx);
861+
835862
//
836863
// Input/Output
837864
//
@@ -890,14 +917,27 @@ class ComputeGraph final {
890917

891918
void execute();
892919

920+
//
921+
// Tensor View
922+
//
923+
924+
void virtual_clone(const ValueRef dst, const ValueRef src);
925+
926+
void virtual_transpose(
927+
const ValueRef tensor,
928+
const int64_t dim0,
929+
const int64_t dim1);
930+
893931
//
894932
// Dynamic Shape support
895933
//
896934

897935
void resize_input(const int64_t idx, const std::vector<int64_t>& new_sizes);
936+
898937
void virtual_resize(
899938
const ValueRef idx,
900939
const std::vector<int64_t>& new_sizes);
940+
901941
void propagate_resize();
902942

903943
//
@@ -939,6 +979,8 @@ class ComputeGraph final {
939979
friend class SymIntPtr;
940980

941981
friend struct TmpTensor;
982+
friend struct SharedObject;
983+
friend class BlitNode;
942984
};
943985

944986
template <typename T>

backends/vulkan/runtime/graph/ops/BlitNode.cpp

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,9 @@ BlitNode::BlitNode(
2626
}
2727

2828
void BlitNode::encode(ComputeGraph* graph) {
29-
auto src_tensor = graph->get_tensor(src_);
30-
auto dst_tensor = graph->get_tensor(dst_);
3129
VK_CHECK_COND(
32-
src_tensor->storage_type() != utils::kBuffer &&
33-
dst_tensor->storage_type() != utils::kBuffer,
30+
graph->storage_type_of(src_) != utils::kBuffer &&
31+
graph->storage_type_of(dst_) != utils::kBuffer,
3432
"BlitNode: Only texture backed tensors are supported.");
3533

3634
api::Context* const context = graph->context();
@@ -41,18 +39,18 @@ void BlitNode::encode(ComputeGraph* graph) {
4139
// Hack to get timing data for non shader op
4240
std::string kernel_name("Blit_");
4341
kernel_name.reserve(32);
44-
kernel_name += vkapi::to_string(src_tensor->dtype());
42+
kernel_name += vkapi::to_string(graph->dtype_of(src_));
4543
kernel_name += "_to_";
46-
kernel_name += vkapi::to_string(dst_tensor->dtype());
44+
kernel_name += vkapi::to_string(graph->dtype_of(dst_));
4745

4846
context->report_shader_dispatch_start(
4947
kernel_name, utils::uvec3(), utils::WorkgroupSize(), node_id_);
5048

5149
context->register_blit(
5250
pipeline_barrier,
53-
src_tensor->image(
51+
graph->get_tensor(src_)->image(
5452
pipeline_barrier, vkapi::PipelineStage::TRANSFER, vkapi::kRead),
55-
dst_tensor->image(
53+
graph->get_tensor(dst_)->image(
5654
pipeline_barrier, vkapi::PipelineStage::TRANSFER, vkapi::kWrite));
5755

5856
context->report_shader_dispatch_end();

backends/vulkan/runtime/graph/ops/PrepackNode.cpp

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,8 @@ namespace vkcompute {
1818

1919
vkapi::ShaderInfo get_noop_shader(ComputeGraph& graph, const ValueRef packed) {
2020
std::string noop_shader_name("no_op");
21-
vTensorPtr t_packed = graph.get_tensor(packed);
22-
add_dtype_suffix(noop_shader_name, *t_packed);
23-
add_storage_type_suffix(noop_shader_name, *t_packed);
21+
add_dtype_suffix(noop_shader_name, graph.dtype_of(packed));
22+
add_storage_type_suffix(noop_shader_name, graph.storage_type_of(packed));
2423
return VK_KERNEL_FROM_STR(noop_shader_name);
2524
}
2625

@@ -48,13 +47,13 @@ PrepackNode::PrepackNode(
4847
}
4948

5049
api::StagingBuffer PrepackNode::create_staging_buffer(ComputeGraph* graph) {
51-
vTensorPtr packed = graph->get_tensor(packed_);
52-
53-
// If no TensorRef is provided, create a staging buffer of zeros according to
54-
// the vkapi::vTensor metadata.
50+
// If no TensorRef is provided, create a staging buffer of zeros based on the
51+
// Tensor metadata.
5552
if (graph->val_is_none(tref_)) {
56-
size_t numel = utils::multiply_integers(packed->sizes());
57-
api::StagingBuffer staging(graph->context(), packed->dtype(), numel);
53+
const std::vector<int64_t> packed_sizes = graph->sizes_of(packed_);
54+
size_t numel = utils::multiply_integers(packed_sizes);
55+
api::StagingBuffer staging(
56+
graph->context(), graph->dtype_of(packed_), numel);
5857
staging.set_staging_zeros();
5958
return staging;
6059
}
@@ -80,7 +79,6 @@ void PrepackNode::encode(ComputeGraph* graph) {
8079

8180
context->check_device_capabilities(shader_);
8281

83-
vTensorPtr packed = graph->get_tensor(packed_);
8482
api::StagingBuffer staging = create_staging_buffer(graph);
8583

8684
std::unique_lock<std::mutex> cmd_lock = context->dispatch_lock();
@@ -101,8 +99,8 @@ void PrepackNode::encode(ComputeGraph* graph) {
10199
shader_, local_workgroup_size_, spec_vars_, push_constants_offset);
102100

103101
uint32_t idx = 0;
104-
bind_tensor_to_descriptor_set(
105-
*packed,
102+
graph->bind_tensor_to_descriptor_set(
103+
packed_,
106104
pipeline_barrier,
107105
vkapi::MemoryAccessType::WRITE,
108106
descriptor_set,
@@ -128,8 +126,8 @@ void PrepackNode::encode(ComputeGraph* graph) {
128126
vkapi::DescriptorSet descriptor_set = context->get_descriptor_set(
129127
noop_shader_, utils::WorkgroupSize(1, 1, 1));
130128

131-
bind_tensor_to_descriptor_set(
132-
*packed,
129+
graph->bind_tensor_to_descriptor_set(
130+
packed_,
133131
pipeline_barrier,
134132
vkapi::MemoryAccessType::READ,
135133
descriptor_set,

0 commit comments

Comments
 (0)