Skip to content

Commit 928bfa8

Browse files
author
pytorchbot
committed
2024-09-19 nightly release (ad95e46)
1 parent 56da94e commit 928bfa8

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+1020
-454
lines changed

CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,8 @@ option(EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL "Build the Runner Util extension"
183183

184184
option(EXECUTORCH_BUILD_EXTENSION_TENSOR "Build the Tensor extension" OFF)
185185

186+
option(EXECUTORCH_BUILD_EXTENSION_TRAINING "Build the training extension" OFF)
187+
186188
option(EXECUTORCH_BUILD_GTESTS "Build googletest based test binaries" OFF)
187189

188190
option(EXECUTORCH_BUILD_MPS "Build the MPS backend" OFF)
@@ -636,6 +638,10 @@ if(EXECUTORCH_BUILD_EXTENSION_MODULE)
636638
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/module)
637639
endif()
638640

641+
if(EXECUTORCH_BUILD_EXTENSION_TRAINING)
642+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/training)
643+
endif()
644+
639645
if(EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL)
640646
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/runner_util)
641647
endif()

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ please visit our documentation website [for the latest release](https://pytorch.
2222

2323
Check out the [Getting Started](https://pytorch.org/executorch/stable/getting-started-setup.html#quick-setup-colab-jupyter-notebook-prototype) page for a quick spin.
2424

25+
Check out the examples of [Llama](./examples/models/llama2/README.md), [Llava](./examples/models/llava/README.md) and [other models](./examples/README.md) running on edge devices using ExecuTorch.
26+
2527
## Feedback
2628

2729
We welcome any feedback, suggestions, and bug reports from the community to help

backends/vulkan/runtime/api/containers/Tensor.cpp

Lines changed: 14 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -418,14 +418,12 @@ vTensor::vTensor(
418418
padded_sizes_{calculate_padded_sizes(sizes, memory_layout_)},
419419
unsqueezed_strides_{unsqueeze_strides(strides_, numel_)},
420420
padded_numel_(utils::multiply_integers(padded_sizes_)),
421-
texture_limits_{{0, 0, 0}},
422421
logical_limits_{{0, 0, 0}},
423422
// Utility Uniform Buffers that can be passed to shaders as arguments
424423
sizes_uniform_(),
425424
strides_uniform_(),
426425
numel_uniform_(),
427426
axis_map_uniform_(),
428-
texture_limits_uniform_(),
429427
logical_limits_uniform_(),
430428
// Construct Tensor storage
431429
storage_(
@@ -440,12 +438,7 @@ vTensor::vTensor(
440438
dim_order_is_valid(dim_order_), "computed dim order is invalid");
441439

442440
if (storage_type != utils::kBuffer) {
443-
texture_limits_.limits = utils::ivec3{
444-
utils::safe_downcast<int32_t>(storage_.image_extents_[0]),
445-
utils::safe_downcast<int32_t>(storage_.image_extents_[1]),
446-
utils::safe_downcast<int32_t>(storage_.image_extents_[2])};
447-
448-
update_logical_limits();
441+
set_logical_limits(storage_.image_extents_);
449442
}
450443

451444
if (dtype == vkapi::kHalf) {
@@ -470,14 +463,12 @@ vTensor::vTensor(const vTensor& other)
470463
other.unsqueezed_strides_.begin(),
471464
other.unsqueezed_strides_.end()},
472465
padded_numel_(other.padded_numel_),
473-
texture_limits_{other.texture_limits_},
474466
logical_limits_{other.logical_limits_},
475467
// Empty initialize Utility Uniform Buffers
476468
sizes_uniform_(),
477469
strides_uniform_(),
478470
numel_uniform_(),
479471
axis_map_uniform_(),
480-
texture_limits_uniform_(),
481472
logical_limits_uniform_(),
482473
// Copy Tensor storage
483474
storage_(other.storage_) {}
@@ -498,14 +489,12 @@ vTensor::vTensor(
498489
padded_sizes_{calculate_padded_sizes(sizes, memory_layout_)},
499490
unsqueezed_strides_{unsqueeze_strides(strides_, numel_)},
500491
padded_numel_(utils::multiply_integers(padded_sizes_)),
501-
texture_limits_{other.texture_limits_},
502492
logical_limits_(other.logical_limits_),
503493
// Empty initialize Utility Uniform Buffers
504494
sizes_uniform_(),
505495
strides_uniform_(),
506496
numel_uniform_(),
507497
axis_map_uniform_(),
508-
texture_limits_uniform_(),
509498
logical_limits_uniform_(),
510499
// Copy Tensor storage
511500
storage_(other.storage_, vkapi::element_size(dtype_) * offset_numel) {
@@ -547,18 +536,10 @@ vkapi::VulkanBuffer& vTensor::buffer(
547536
return storage_.buffer_;
548537
}
549538

550-
void vTensor::update_logical_limits() {
551-
logical_limits_.limits[0] = texture_limits_.limits[axis_map_.at(0)];
552-
logical_limits_.limits[1] = texture_limits_.limits[axis_map_.at(1)];
553-
logical_limits_.limits[2] = texture_limits_.limits[axis_map_.at(2)];
554-
}
555-
556-
utils::uvec3 vTensor::logical_extents() const {
557-
utils::uvec3 logical_extents(
558-
{utils::safe_downcast<uint32_t>(logical_limits_.limits[0]),
559-
utils::safe_downcast<uint32_t>(logical_limits_.limits[1]),
560-
utils::safe_downcast<uint32_t>(logical_limits_.limits[2])});
561-
return logical_extents;
539+
void vTensor::set_logical_limits(const utils::uvec3& image_extents) {
540+
logical_limits_.limits[0] = image_extents[axis_map_.at(0)];
541+
logical_limits_.limits[1] = image_extents[axis_map_.at(1)];
542+
logical_limits_.limits[2] = image_extents[axis_map_.at(2)];
562543
}
563544

564545
const vkapi::BufferBindInfo vTensor::sizes_ubo() {
@@ -585,13 +566,6 @@ const vkapi::BufferBindInfo vTensor::axis_map_ubo() {
585566
return vkapi::BufferBindInfo(axis_map_uniform_.buffer());
586567
}
587568

588-
const vkapi::BufferBindInfo vTensor::texture_limits_ubo() {
589-
if (!texture_limits_uniform_.buffer()) {
590-
texture_limits_uniform_ = ParamsBuffer(storage_.context_, texture_limits_);
591-
}
592-
return vkapi::BufferBindInfo(texture_limits_uniform_.buffer());
593-
}
594-
595569
const vkapi::BufferBindInfo vTensor::logical_limits_ubo() {
596570
if (!logical_limits_uniform_.buffer()) {
597571
logical_limits_uniform_ = ParamsBuffer(storage_.context_, logical_limits_);
@@ -655,18 +629,10 @@ void vTensor::update_metadata() {
655629
unsqueezed_strides_ = unsqueeze_strides(strides_, numel_);
656630
padded_numel_ = utils::multiply_integers(padded_sizes_);
657631

658-
// Calculate the extents of the image texture that would have been required
659-
// for a tensor of the new sizes.
660-
utils::uvec3 virtual_extents =
661-
calculate_image_extents(padded_sizes_, axis_map_, memory_layout_);
662-
663-
// Update the texture limits to reflect the new virtual extents.
664-
texture_limits_.limits = utils::ivec3{
665-
utils::safe_downcast<int32_t>(virtual_extents[0]),
666-
utils::safe_downcast<int32_t>(virtual_extents[1]),
667-
utils::safe_downcast<int32_t>(virtual_extents[2])};
668-
669-
update_logical_limits();
632+
// Calculate the image extents that would have been used to allocate a texture
633+
// withthe current sizes, and use that to set the logical limits.
634+
set_logical_limits(
635+
calculate_image_extents(padded_sizes_, axis_map_, memory_layout_));
670636

671637
if (sizes_uniform_.buffer()) {
672638
sizes_uniform_.update(utils::make_whcn_ivec4(sizes_));
@@ -680,9 +646,6 @@ void vTensor::update_metadata() {
680646
if (axis_map_uniform_.buffer()) {
681647
axis_map_uniform_.update(utils::make_ivec4(axis_map_));
682648
}
683-
if (texture_limits_uniform_.buffer()) {
684-
texture_limits_uniform_.update(texture_limits_);
685-
}
686649
if (logical_limits_uniform_.buffer()) {
687650
logical_limits_uniform_.update(logical_limits_);
688651
}
@@ -695,9 +658,11 @@ void vTensor::check_sizes(const std::vector<int64_t>& sizes) const {
695658
utils::uvec3 virtual_extents =
696659
calculate_image_extents(padded_sizes_, axis_map_, memory_layout_);
697660

698-
bool valid_resize = virtual_extents[0] <= image_extents()[0];
699-
valid_resize = valid_resize && virtual_extents[1] <= image_extents()[1];
700-
valid_resize = valid_resize && virtual_extents[2] <= image_extents()[2];
661+
bool valid_resize = virtual_extents[0] <= storage_.image_extents_[0];
662+
valid_resize =
663+
valid_resize && virtual_extents[1] <= storage_.image_extents_[1];
664+
valid_resize =
665+
valid_resize && virtual_extents[2] <= storage_.image_extents_[2];
701666

702667
VK_CHECK_COND(
703668
valid_resize,

backends/vulkan/runtime/api/containers/Tensor.h

Lines changed: 22 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -276,9 +276,7 @@ class vTensor final {
276276
// Contains the number of elements in the tensor according to the padded
277277
// sizes.
278278
size_t padded_numel_;
279-
// See the comments documenting image_extents() for more context.
280-
TextureLimits texture_limits_;
281-
// See the comments documenting logical_extents() for more context.
279+
// See the comments documenting logical_limits() for more context.
282280
TextureLimits logical_limits_;
283281

284282
/*
@@ -294,7 +292,6 @@ class vTensor final {
294292
ParamsBuffer strides_uniform_;
295293
ParamsBuffer numel_uniform_;
296294
ParamsBuffer axis_map_uniform_;
297-
ParamsBuffer texture_limits_uniform_;
298295
ParamsBuffer logical_limits_uniform_;
299296

300297
vTensorStorage storage_;
@@ -342,28 +339,30 @@ class vTensor final {
342339
return storage_.storage_type_ == utils::kBuffer;
343340
}
344341

345-
/*
346-
* Returns the raw image extents of the underlying image texture used to store
347-
* the tensor's data. Note that due to axis mapping, the X, Y, and Z extents
348-
* may not correspond to the width, height, or channels dimension of the
349-
* tensor.
350-
*/
351-
inline const utils::uvec3& image_extents() const {
352-
return storage_.image_extents_;
353-
}
354-
355342
private:
356-
void update_logical_limits();
343+
void set_logical_limits(const utils::uvec3& image_extents);
357344

358345
public:
359346
/*
360-
* Returns the image extents of the underlying image texture, but re-ordered
361-
* such that the first element is the extent of the axis used to represent the
362-
* tensor's width dimension, the second element is the extent of the axis used
363-
* to represent the tensor's height dimension, and the third element is the
364-
* extent of the axis used to represent the tensor's channels dimension.
347+
* The logical limits of the tensor are derived from the image extents of the
348+
* image texture used to store the tensor, but with two key differences.
349+
*
350+
* First, the image extents are permuted according to the axis map. This
351+
* makes it so that the first element of the logical limit is the limit of the
352+
* texture axis corresponding to the width dimension of the tensor, the next
353+
* element is the limit of the texture axis corresponding to the height
354+
* dimension and the last element is the limit of the texture axis that
355+
* corresponds to the channels dimension of the tensor.
356+
*
357+
* Second, the logical limits may use smaller extents than the actual image
358+
* extents of the image texture. This is due to dynamic shape; if the tensor's
359+
* `virtual_resize()` function is called, then the logical limits will reflect
360+
* the extents that would be needed to support a tensor with the updated sizes
361+
* instead of the original sizes.
365362
*/
366-
utils::uvec3 logical_extents() const;
363+
inline const utils::ivec3& logical_limits() const {
364+
return logical_limits_.limits;
365+
}
367366

368367
/*
369368
* Extract an `vkapi::ScalarType` from the TensorOptions member
@@ -430,18 +429,8 @@ class vTensor final {
430429
const vkapi::BufferBindInfo axis_map_ubo();
431430

432431
/*
433-
* Returns a GPU buffer containing the virtual image extents of the tensor.
434-
* Since a tensor can be resized with the virtual_resize() function, this
435-
* GPU buffer contains the image extents of the tensor calculated using the
436-
* virtual_resize() function. This allows shaders to exit early if they are
437-
* working outside the limits of the texture.
438-
*/
439-
const vkapi::BufferBindInfo texture_limits_ubo();
440-
441-
/*
442-
* Returns a GPU buffer containing the logical image extents of the tensor.
443-
* It contains the same data as texture_limits_ubo(), but with the data
444-
* re-ordered. See the comments for logical_extents() for more context.
432+
* Returns a GPU buffer containing the logical limits of the tensor. See the
433+
* comments for logical_limits() for more context.
445434
*/
446435
const vkapi::BufferBindInfo logical_limits_ubo();
447436

@@ -450,10 +439,6 @@ class vTensor final {
450439
*/
451440
const vkapi::BufferBindInfo numel_ubo();
452441

453-
inline const utils::ivec3 texture_limits() const {
454-
return texture_limits_.limits;
455-
}
456-
457442
inline size_t numel() const {
458443
return numel_;
459444
}

backends/vulkan/runtime/graph/ComputeGraph.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,7 @@ utils::uvec3 ComputeGraph::create_global_wg_size(const ValueRef idx) {
428428
if (is_buffer_storage(idx)) {
429429
return {uint32_t(numel_of(idx)), 1u, 1u};
430430
}
431-
return image_extents_of(idx);
431+
return logical_limits_of(idx);
432432
}
433433

434434
utils::uvec3 ComputeGraph::create_local_wg_size(

backends/vulkan/runtime/graph/ComputeGraph.h

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -284,12 +284,8 @@ class ComputeGraph final {
284284

285285
vkapi::ScalarType dtype_of(const ValueRef idx) const;
286286

287-
inline utils::uvec3 image_extents_of(const ValueRef idx) const {
288-
return values_.at(idx).toConstTensor().image_extents();
289-
}
290-
291-
inline utils::uvec3 logical_extents_of(const ValueRef idx) const {
292-
return values_.at(idx).toConstTensor().logical_extents();
287+
inline const utils::ivec3& logical_limits_of(const ValueRef idx) const {
288+
return values_.at(idx).toConstTensor().logical_limits();
293289
}
294290

295291
inline int32_t numel_of(const ValueRef idx) const {
@@ -335,10 +331,6 @@ class ComputeGraph final {
335331
return values_.at(idx).toTensor().axis_map_ubo();
336332
}
337333

338-
inline vkapi::BufferBindInfo texture_limits_ubo(const ValueRef idx) {
339-
return values_.at(idx).toTensor().texture_limits_ubo();
340-
}
341-
342334
inline vkapi::BufferBindInfo logical_limits_ubo(const ValueRef idx) {
343335
return values_.at(idx).toTensor().logical_limits_ubo();
344336
}

backends/vulkan/runtime/graph/ops/glsl/activations.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ float hardswish(float x) {
1818

1919
vec4 hardswish(vec4 tex) {
2020
return vec4(
21-
hardswish(tex.x), hardswish(tex.y), hardswish(tex.z), hardswish(tex.z));
21+
hardswish(tex.x), hardswish(tex.y), hardswish(tex.z), hardswish(tex.w));
2222
}
2323

2424
float hardshrink(float x, float lambda, float neg_lambda) {

backends/vulkan/runtime/graph/ops/impl/BatchNorm.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ void add_native_batch_norm_node(
8888
{{out_ref, vkapi::MemoryAccessType::WRITE},
8989
{{in_ref, arg_weight, arg_bias, arg_mean, arg_var},
9090
vkapi::MemoryAccessType::READ}},
91-
{t_out->texture_limits_ubo(),
91+
{t_out->logical_limits_ubo(),
9292
graph.create_params_buffer(epsilon),
9393
graph.create_params_buffer(num_texel_per_batch)}));
9494
}

backends/vulkan/runtime/graph/ops/impl/Cat.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ void add_cat_default_node(
4040

4141
for (ValueRef input_ref : *input_list) {
4242
vTensorPtr t_in = graph.get_tensor(input_ref);
43-
utils::ivec3 range = t_in->texture_limits();
43+
utils::ivec3 range = t_in->logical_limits();
4444
add_copy_offset_node(
4545
graph, input_ref, range, src_offset, dst_offset, out);
4646
dst_offset[0] += range[0];
@@ -52,7 +52,7 @@ void add_cat_default_node(
5252

5353
for (ValueRef input_ref : *input_list) {
5454
vTensorPtr t_in = graph.get_tensor(input_ref);
55-
utils::ivec3 range = t_in->texture_limits();
55+
utils::ivec3 range = t_in->logical_limits();
5656
add_copy_offset_node(
5757
graph, input_ref, range, src_offset, dst_offset, out);
5858
dst_offset[1] += range[1];
@@ -63,7 +63,7 @@ void add_cat_default_node(
6363

6464
for (ValueRef input_ref : *input_list) {
6565
vTensorPtr t_in = graph.get_tensor(input_ref);
66-
utils::ivec3 range = t_in->texture_limits();
66+
utils::ivec3 range = t_in->logical_limits();
6767
add_copy_offset_node(
6868
graph, input_ref, range, src_offset, dst_offset, out);
6969
dst_offset[2] += range[2];

backends/vulkan/runtime/graph/ops/impl/Clone.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ void add_clone_node(
3232
graph.create_local_wg_size(out),
3333
{{out, vkapi::MemoryAccessType::WRITE},
3434
{in, vkapi::MemoryAccessType::READ}},
35-
{t_out->texture_limits_ubo()}));
35+
{t_out->logical_limits_ubo()}));
3636
}
3737

3838
void clone(ComputeGraph& graph, const std::vector<ValueRef>& args) {

0 commit comments

Comments
 (0)