Skip to content

Commit 08fe952

Browse files
committed
Update
1 parent baa41c6 commit 08fe952

File tree

4 files changed

+52
-26
lines changed

4 files changed

+52
-26
lines changed

backends/cuda/cuda_backend.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,8 @@ def preprocess(
151151
"aot_inductor.package_constants_in_so": False,
152152
# Store weight constants on disk in a binary blob
153153
"aot_inductor.package_constants_on_disk_format": "binary_blob",
154+
# Avoid issues like 'NoneType' object has no attribute 'reorder_iter_loops'
155+
"loop_ordering_after_fusion": False,
154156
# Enable maximum automatic tuning for optimal performance
155157
"max_autotune": True,
156158
# Use TRITON for GEMM (General Matrix Multiply) operations tuning only to avoid using operators in libtorch

backends/cuda/cuda_partitioner.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
PartitionResult,
1717
)
1818
from executorch.exir.backend.utils import tag_constant_data, tag_mutated_buffer
19+
from torch._export.utils import is_buffer, is_lifted_tensor_constant, is_param
1920
from torch.export.exported_program import ExportedProgram
2021

2122

@@ -56,6 +57,18 @@ def partition(self, exported_program: ExportedProgram) -> PartitionResult:
5657
tag_constant_data(exported_program)
5758
tag_mutated_buffer(exported_program)
5859

60+
# Tag constant placeholders that have no users
61+
# tag_constant_data only tags constants that have users with delegation_tag
62+
# but we need to tag all constants for this partition
63+
for node in exported_program.graph.nodes:
64+
if node.op == "placeholder" and (
65+
is_param(exported_program, node)
66+
or is_buffer(exported_program, node)
67+
or is_lifted_tensor_constant(exported_program, node)
68+
):
69+
if "delegation_tag" not in node.meta:
70+
node.meta["delegation_tag"] = tag
71+
5972
return PartitionResult(
6073
tagged_exported_program=exported_program, partition_tags=partition_tags
6174
)

extension/llm/runner/wav_loader.h

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -168,18 +168,29 @@ inline std::vector<float> load_wav_audio_data(const std::string& fp) {
168168
size_t data_offset = header->dataOffset;
169169
size_t data_size = header->Subchunk2Size;
170170
int bits_per_sample = header->bitsPerSample;
171+
int audio_format = header->AudioFormat;
171172

172173
std::vector<float> audio_data;
173174

174175
if (bits_per_sample == 32) {
175176
size_t num_samples = data_size / 4;
176177
audio_data.resize(num_samples);
177-
const int32_t* input_buffer =
178-
reinterpret_cast<const int32_t*>(data + data_offset);
179178

180-
for (size_t i = 0; i < num_samples; ++i) {
181-
audio_data[i] = static_cast<float>(
182-
static_cast<double>(input_buffer[i]) * kOneOverIntMax);
179+
if (audio_format == 3) {
180+
// IEEE float format - read directly as floats
181+
const float* input_buffer =
182+
reinterpret_cast<const float*>(data + data_offset);
183+
for (size_t i = 0; i < num_samples; ++i) {
184+
audio_data[i] = input_buffer[i];
185+
}
186+
} else {
187+
// PCM integer format - normalize from int32
188+
const int32_t* input_buffer =
189+
reinterpret_cast<const int32_t*>(data + data_offset);
190+
for (size_t i = 0; i < num_samples; ++i) {
191+
audio_data[i] = static_cast<float>(
192+
static_cast<double>(input_buffer[i]) * kOneOverIntMax);
193+
}
183194
}
184195
} else if (bits_per_sample == 16) {
185196
size_t num_samples = data_size / 2;

extension/tensor/tensor_ptr.cpp

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -79,27 +79,27 @@ TensorPtr make_tensor_ptr(
7979
});
8080
}
8181
}
82-
std::vector<executorch::aten::StridesType> computed_strides(dim);
83-
84-
auto error = runtime::dim_order_to_stride(
85-
sizes.data(), dim_order.data(), dim, computed_strides.data());
86-
ET_CHECK_MSG(error == runtime::Error::Ok, "Failed to compute strides.");
87-
88-
if (!strides.empty()) {
89-
for (size_t i = 0; i < dim; i++) {
90-
ET_CHECK_MSG(
91-
strides[i] == computed_strides[i] || sizes[i] == 1,
92-
"invalid strides for dim %zu: %" ET_PRI_SIZES_AND_STRIDES
93-
"!= %" ET_PRI_SIZES_AND_STRIDES
94-
" while its size is %" ET_PRI_SIZES_AND_STRIDES " != 1",
95-
i,
96-
strides[i],
97-
computed_strides[i],
98-
sizes[i]);
99-
}
100-
}
101-
102-
strides = std::move(computed_strides);
82+
// std::vector<executorch::aten::StridesType> computed_strides(dim);
83+
84+
// auto error = runtime::dim_order_to_stride(
85+
// sizes.data(), dim_order.data(), dim, computed_strides.data());
86+
// ET_CHECK_MSG(error == runtime::Error::Ok, "Failed to compute strides.");
87+
88+
// if (!strides.empty()) {
89+
// for (size_t i = 0; i < dim; i++) {
90+
// ET_CHECK_MSG(
91+
// strides[i] == computed_strides[i] || sizes[i] == 1,
92+
// "invalid strides for dim %zu: %" ET_PRI_SIZES_AND_STRIDES
93+
// "!= %" ET_PRI_SIZES_AND_STRIDES
94+
// " while its size is %" ET_PRI_SIZES_AND_STRIDES " != 1",
95+
// i,
96+
// strides[i],
97+
// computed_strides[i],
98+
// sizes[i]);
99+
// }
100+
// }
101+
102+
// strides = std::move(computed_strides);
103103

104104
#ifndef USE_ATEN_LIB
105105
executorch::aten::TensorImpl tensor_impl(

0 commit comments

Comments
 (0)