Skip to content

Commit faf1700

Browse files
committed
Update
[ghstack-poisoned]
2 parents ce55ae8 + e0a7a1d commit faf1700

29 files changed

+370
-178
lines changed

backends/cadence/aot/memory_planning.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,9 @@ def plan_spec(
116116
Greedily place the spec in the first memory that can fit it.
117117
"""
118118
for spec.mem_id in range(1, self.get_num_memories()):
119+
if placement_constraints.is_mem_id_in_blocklist(spec, spec.mem_id):
120+
# Skip placement for blocked memory id.
121+
continue
119122
prev_offset, smallest_gap = 0, float("inf")
120123
for allocated_spec in state.allocated_buffers[spec.mem_id]:
121124
if not Verifier.lifetime_overlap(spec, allocated_spec):
@@ -141,11 +144,11 @@ def plan_spec(
141144
)
142145
if spec.mem_offset is None:
143146
spec.mem_offset = prev_offset
144-
if not self.is_valid_placement(spec, placement_constraints):
145-
spec.mem_offset = None
146-
continue
147-
else:
148-
spec.mem_offset = prev_offset
147+
148+
if not self.is_valid_placement(spec, placement_constraints):
149+
# Skip placement for invalid memory id.
150+
spec.mem_offset = None
151+
continue
149152

150153
state.place_spec(spec)
151154
# A data structure used for maintaining the tensor order

backends/cadence/aot/memory_planning_algo.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ def _place_memory_id_pinned_specs(
204204
for spec, c in spec_with_abs_constraint.items()
205205
if c is not None and c.pinned_memory_id == mem_id and c.offset is None
206206
}
207-
logging.error(f"Placing specs {mem_id_pinned_specs} for {mem_id=}")
207+
logging.debug(f"Placing specs {mem_id_pinned_specs} for {mem_id=}")
208208

209209
with self.block_memories_except(mem_id):
210210
self.plan(
@@ -220,7 +220,7 @@ def _place_memory_id_pinned_specs(
220220
if constraint is None:
221221
continue
222222

223-
logging.error(f"Placing spec {spec} with {constraint}")
223+
logging.debug(f"Placing spec {spec} with {constraint}")
224224

225225
if not state.is_placed(spec):
226226
raise MemoryError(

backends/cadence/aot/tests/test_memory_passes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1044,7 +1044,7 @@ class DummyMemIdBlockConstraintGen(PassBase):
10441044
mul: blocks 1, 3
10451045
"""
10461046

1047-
def __init__(self, memory_constraints: MemoryConfig):
1047+
def __init__(self, memory_constraints: MemConstraints):
10481048
self.memory_constraints = memory_constraints
10491049

10501050
def call(self, graph_module: torch.fx.GraphModule) -> PassResult:

backends/cadence/fusion_g3/operators/op_clamp.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ bool is_out_of_bounds(CTYPE_VAL val) {
4545
}
4646

4747
ET_NODISCARD bool check_bounds(
48+
KernelRuntimeContext& ctx,
4849
const Scalar& val_scalar,
4950
const ScalarType& val_type,
5051
const ScalarType& out_type,
@@ -107,14 +108,14 @@ Tensor& clamp_out(
107108
if (has_min) {
108109
ET_KERNEL_CHECK(
109110
ctx,
110-
check_bounds(min_opt.value(), min_type, out_type, "minimum"),
111+
check_bounds(ctx, min_opt.value(), min_type, out_type, "minimum"),
111112
InvalidArgument,
112113
out);
113114
}
114115
if (has_max) {
115116
ET_KERNEL_CHECK(
116117
ctx,
117-
check_bounds(max_opt.value(), max_type, out_type, "maximum"),
118+
check_bounds(ctx, max_opt.value(), max_type, out_type, "maximum"),
118119
InvalidArgument,
119120
out);
120121
}

examples/models/llama/runner/static_attention_io_manager.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,10 @@ class StaticAttentionIOManager {
576576
}
577577
}
578578

579+
size_t input_pos() const {
580+
return input_pos_;
581+
}
582+
579583
/**
580584
* Prefill helper. Run multiple inferences as needed depending on the length
581585
* of the prompt and method's input length. Returns the position in the output
@@ -586,6 +590,7 @@ class StaticAttentionIOManager {
586590
executorch::runtime::Span<TokenT> tokens,
587591
executorch::runtime::Span<TokenT> input_buffer,
588592
executorch::runtime::Method& method) {
593+
ET_LOG(Info, "Prefilling at position %zu", input_pos_);
589594
size_t input_len = input_buffer.size();
590595
auto& masks = get_mask(input_buffer.size());
591596
for (auto& pair : masks) {
@@ -621,6 +626,7 @@ class StaticAttentionIOManager {
621626
executorch::runtime::Method& method,
622627
std::function<TokenT(executorch::runtime::Method&)>& sample,
623628
std::function<bool(TokenT)>& token_callback) {
629+
ET_LOG(Info, "Decoding at position %zu", input_pos_);
624630
set_input(method, 0, input_buffer.data());
625631
auto& masks = get_mask(input_buffer.size());
626632
for (auto& pair : masks) {
@@ -661,6 +667,10 @@ class StaticAttentionIOManager {
661667
size_t window_size,
662668
size_t n_verifications,
663669
std::unordered_map<TokenT, SuffixCache<TokenT>> suffix_caches) {
670+
ET_LOG(
671+
Info,
672+
"Decoding with lookahead and verification at position %zu",
673+
input_pos_);
664674
set_input(method, 0, input_buffer.data());
665675
size_t input_len = input_buffer.size();
666676

exir/tests/test_remove_unused_parameters_pass.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ def _test_pass_e2e(
196196

197197
self.assertEqual(1, len(runtime_outputs))
198198
self.assertTrue(
199-
torch.allclose(runtime_outputs[0], eager_outputs, atol=2e-6),
199+
torch.allclose(runtime_outputs[0], eager_outputs, atol=1e-5),
200200
"Values out of tolerance.\n"
201201
+ f" Strict: {strict}, ToEdge: {use_to_edge}, Delegate: {delegate}.\n"
202202
+ f" Eager: {eager_outputs}.\n"

extension/apple/ExecuTorch/Exported/ExecuTorchTensor.mm

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -265,9 +265,15 @@ - (NSString *)description {
265265
auto const count = _tensor->numel();
266266
os << "\n count: " << count << ",";
267267
os << "\n scalars: [";
268+
// Create a minimal context for error handling in ET_SWITCH
269+
struct {
270+
[[noreturn]] void fail(torch::executor::Error /* error */) {
271+
ET_CHECK_MSG(false, "Unsupported dtype in description");
272+
}
273+
} ctx;
268274
ET_SWITCH_REALHBBF16_TYPES(
269275
static_cast<ScalarType>(_tensor->scalar_type()),
270-
nullptr,
276+
ctx,
271277
"description",
272278
CTYPE,
273279
[&] {
@@ -488,9 +494,15 @@ - (instancetype)initWithScalars:(NSArray<NSNumber *> *)scalars
488494
"Number of scalars does not match the shape");
489495
std::vector<uint8_t> data;
490496
data.resize(count * ExecuTorchSizeOfDataType(dataType));
497+
// Create a minimal context for error handling in ET_SWITCH
498+
struct {
499+
[[noreturn]] void fail(torch::executor::Error /* error */) {
500+
ET_CHECK_MSG(false, "Unsupported dtype in initWithScalars");
501+
}
502+
} ctx;
491503
for (NSUInteger index = 0; index < count; ++index) {
492504
ET_SWITCH_REALHBBF16_AND_UINT_TYPES(
493-
static_cast<ScalarType>(dataType), nil, "initWithScalars", CTYPE, [&] {
505+
static_cast<ScalarType>(dataType), ctx, "initWithScalars", CTYPE, [&] {
494506
reinterpret_cast<CTYPE *>(data.data())[index] = utils::toType<CTYPE>(scalars[index]);
495507
}
496508
);
@@ -801,8 +813,14 @@ + (instancetype)fullTensorWithShape:(NSArray<NSNumber *> *)shape
801813
dataType:(ExecuTorchDataType)dataType
802814
shapeDynamism:(ExecuTorchShapeDynamism)shapeDynamism {
803815
Scalar fillValue;
816+
// Create a minimal context for error handling in ET_SWITCH
817+
struct {
818+
[[noreturn]] void fail(torch::executor::Error /* error */) {
819+
ET_CHECK_MSG(false, "Unsupported dtype in fullTensor");
820+
}
821+
} ctx;
804822
ET_SWITCH_REALHBBF16_AND_UINT_TYPES(
805-
static_cast<ScalarType>(dataType), nil, "fullTensor", CTYPE, [&] {
823+
static_cast<ScalarType>(dataType), ctx, "fullTensor", CTYPE, [&] {
806824
fillValue = utils::toType<CTYPE>(scalar);
807825
}
808826
);

extension/llm/runner/text_decoder_runner.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,12 +68,20 @@ class ET_EXPERIMENTAL TextDecoderRunner {
6868
const executorch::aten::Tensor& logits_tensor,
6969
const float temperature = 0.0f) {
7070
int32_t result = 0;
71+
72+
// Create a minimal context for error handling in ET_SWITCH
73+
struct {
74+
[[noreturn]] void fail(torch::executor::Error /* error */) {
75+
ET_CHECK_MSG(false, "Unsupported dtype in logits_to_token");
76+
}
77+
} ctx;
78+
7179
ET_SWITCH_THREE_TYPES(
7280
Float,
7381
Half,
7482
BFloat16,
7583
logits_tensor.scalar_type(),
76-
unused,
84+
ctx,
7785
"logits_to_token",
7886
CTYPE,
7987
[&]() {

extension/tensor/tensor_ptr.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,15 @@ inline TensorPtr make_tensor_ptr(
111111
runtime::canCast(deduced_type, type),
112112
"Cannot cast deduced type to specified type.");
113113
std::vector<uint8_t> casted_data(data.size() * runtime::elementSize(type));
114-
ET_SWITCH_REALHBBF16_TYPES(type, nullptr, "make_tensor_ptr", CTYPE, [&] {
114+
115+
// Create a minimal context for error handling in ET_SWITCH
116+
struct {
117+
[[noreturn]] void fail(torch::executor::Error /* error */) {
118+
ET_CHECK_MSG(false, "Unsupported dtype in make_tensor_ptr");
119+
}
120+
} ctx;
121+
122+
ET_SWITCH_REALHBBF16_TYPES(type, ctx, "make_tensor_ptr", CTYPE, [&] {
115123
std::transform(
116124
data.begin(),
117125
data.end(),

extension/tensor/tensor_ptr_maker.cpp

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,14 @@ TensorPtr random_strided(
8989
empty_strided(std::move(sizes), std::move(strides), type, dynamism);
9090
std::default_random_engine gen{std::random_device{}()};
9191

92-
ET_SWITCH_REALHBBF16_TYPES(type, nullptr, "random_strided", CTYPE, [&] {
92+
// Create a minimal context for error handling in ET_SWITCH
93+
struct {
94+
[[noreturn]] void fail(torch::executor::Error /* error */) {
95+
ET_CHECK_MSG(false, "Unsupported dtype in random_strided");
96+
}
97+
} ctx;
98+
99+
ET_SWITCH_REALHBBF16_TYPES(type, ctx, "random_strided", CTYPE, [&] {
93100
std::generate_n(tensor->mutable_data_ptr<CTYPE>(), tensor->numel(), [&]() {
94101
return static_cast<CTYPE>(distribution(gen));
95102
});
@@ -124,7 +131,14 @@ TensorPtr full_strided(
124131
executorch::aten::TensorShapeDynamism dynamism) {
125132
auto tensor =
126133
empty_strided(std::move(sizes), std::move(strides), type, dynamism);
127-
ET_SWITCH_REALHBBF16_TYPES(type, nullptr, "full_strided", CTYPE, [&] {
134+
// Create a minimal context for error handling in ET_SWITCH
135+
struct {
136+
[[noreturn]] void fail(torch::executor::Error /* error */) {
137+
ET_CHECK_MSG(false, "Unsupported data type in full_strided");
138+
}
139+
} ctx;
140+
141+
ET_SWITCH_REALHBBF16_TYPES(type, ctx, "full_strided", CTYPE, [&] {
128142
CTYPE value;
129143
ET_EXTRACT_SCALAR(fill_value, value);
130144
std::fill(

0 commit comments

Comments
 (0)