Skip to content

Commit e4919b2

Browse files
author
pytorchbot
committed
2024-09-25 nightly release (3e79ea4)
1 parent 6878f08 commit e4919b2

29 files changed

+951
-147
lines changed

backends/vulkan/runtime/api/containers/Tensor.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -707,8 +707,7 @@ void vTensor::virtual_transpose(const int64_t dim0, const int64_t dim1) {
707707
const int dim1_whcn = sizes_.size() - 1 - dim1;
708708
if (packed_dim_ == dim0_whcn) {
709709
packed_dim_ = dim1_whcn;
710-
}
711-
if (packed_dim_ == dim1_whcn) {
710+
} else if (packed_dim_ == dim1_whcn) {
712711
packed_dim_ = dim0_whcn;
713712
}
714713

@@ -719,6 +718,12 @@ void vTensor::virtual_transpose(const int64_t dim0, const int64_t dim1) {
719718
VK_CHECK_COND(dim0_whcn < 3 && dim1_whcn < 3);
720719
std::iter_swap(
721720
axis_map_.begin() + dim0_whcn, axis_map_.begin() + dim1_whcn);
721+
// Update the "identity" of the concatted dimension
722+
if (axis_map_.at(3) == dim0_whcn) {
723+
axis_map_.at(3) = dim1_whcn;
724+
} else if (axis_map_.at(3) == dim1_whcn) {
725+
axis_map_.at(3) = dim0_whcn;
726+
}
722727
}
723728
update_metadata();
724729
}

backends/vulkan/runtime/graph/ComputeGraph.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,32 @@ std::vector<int64_t> ComputeGraph::sizes_of(const ValueRef idx) const {
198198
VK_THROW("Could not get sizes of value with type ", val.type());
199199
}
200200

201+
int64_t ComputeGraph::dim_of(const ValueRef idx) const {
202+
const Value& val = values_.at(idx);
203+
if (val.isTensor()) {
204+
return val.toConstTensor().dim();
205+
} else if (val.isTensorRef()) {
206+
return val.toConstTensorRef().sizes.size();
207+
}
208+
VK_THROW("Could not get dim of value with type ", val.type());
209+
}
210+
211+
std::vector<int64_t> ComputeGraph::dim_order_of(const ValueRef idx) const {
212+
const Value& val = values_.at(idx);
213+
if (val.isTensor()) {
214+
return val.toConstTensor().dim_order();
215+
}
216+
VK_THROW("Could not get dim order of value with type ", val.type());
217+
}
218+
219+
std::vector<int64_t> ComputeGraph::strides_of(const ValueRef idx) const {
220+
const Value& val = values_.at(idx);
221+
if (val.isTensor()) {
222+
return val.toConstTensor().strides();
223+
}
224+
VK_THROW("Could not get strides of value with type ", val.type());
225+
}
226+
201227
vkapi::ScalarType ComputeGraph::dtype_of(const ValueRef idx) const {
202228
const Value& val = values_.at(idx);
203229
if (val.isTensor()) {

backends/vulkan/runtime/graph/ComputeGraph.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,12 @@ class ComputeGraph final {
282282
VK_THROW("Could not get sizes of value with type ", val.type());
283283
}
284284

285+
int64_t dim_of(const ValueRef idx) const;
286+
287+
std::vector<int64_t> dim_order_of(const ValueRef idx) const;
288+
289+
std::vector<int64_t> strides_of(const ValueRef idx) const;
290+
285291
vkapi::ScalarType dtype_of(const ValueRef idx) const;
286292

287293
inline const utils::ivec3& logical_limits_of(const ValueRef idx) const {

backends/vulkan/runtime/graph/ops/ExecuteNode.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,16 @@ class ComputeGraph;
2121
* access permission.
2222
*/
2323
struct ArgGroup {
24-
ArgGroup(const ValueRef ref, const vkapi::MemoryAccessType access)
24+
ArgGroup(const ValueRef ref, const vkapi::MemoryAccessFlags access)
2525
: refs{ref}, access(access) {}
2626

2727
ArgGroup(
2828
const std::vector<ValueRef>& refs,
29-
const vkapi::MemoryAccessType access)
29+
const vkapi::MemoryAccessFlags access)
3030
: refs(refs), access(access) {}
3131

3232
const std::vector<ValueRef> refs;
33-
const vkapi::MemoryAccessType access;
33+
const vkapi::MemoryAccessFlags access;
3434
};
3535

3636
/*

backends/vulkan/runtime/graph/ops/glsl/copy_channel_offset.glsl

Lines changed: 21 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -20,20 +20,19 @@ ${layout_declare_tensor(0, "w", "t_out", DTYPE, STORAGE)}
2020
${layout_declare_tensor(1, "r", "existing_out", DTYPE, STORAGE)}
2121
${layout_declare_tensor(2, "r", "t_in", DTYPE, STORAGE)}
2222

23-
layout(set = 0, binding = 3) uniform PRECISION restrict CopyArgs {
24-
ivec4 out_sizes;
25-
ivec4 in_sizes;
23+
${layout_declare_ubo(3, "ivec4", "out_sizes")}
24+
${layout_declare_ubo(4, "ivec4", "out_axis_map")}
25+
${layout_declare_ubo(5, "ivec4", "in_sizes")}
26+
${layout_declare_ubo(6, "ivec4", "in_axis_map")}
27+
layout(set = 0, binding = 7) uniform PRECISION restrict CopyArgs {
28+
// Operates on (x, y, z) logical extents.
29+
ivec3 range;
2630
// Analogus to range variable in copy. It defines the # of channel being
2731
// copied.
2832
int channel_range;
29-
int src_channel_offset;
30-
int dst_channel_offset;
31-
int unused;
32-
// Operates on (x, y, z) extents.
33-
ivec3 range;
34-
int unused1;
3533
ivec3 dst_offset;
36-
int unused2;
34+
int dst_channel_offset;
35+
int src_channel_offset;
3736
};
3837

3938
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
@@ -43,36 +42,36 @@ layout(constant_id = 3) const int packed_dim = C_DIM;
4342
void main() {
4443
// Note: Unlike other shaders, the range is often not equal to the destination
4544
// texture extent.
46-
const ivec3 pos = ivec3(gl_GlobalInvocationID);
47-
if (any(greaterThanEqual(pos, range))) {
45+
const ivec3 lpos = ivec3(gl_GlobalInvocationID);
46+
if (any(greaterThanEqual(lpos, range))) {
4847
return;
4948
}
5049

51-
const ivec3 out_pos = pos + dst_offset;
50+
const ivec3 out_lpos = lpos + dst_offset;
5251

53-
const ivec4 out_whcn = to_tensor_idx(out_pos, out_sizes, packed_dim);
52+
const ivec4 out_tidx = lpos_to_tidx(out_lpos, out_sizes, out_axis_map.w, packed_dim);
5453

5554
// First read the existing values to make sure the boundary values stay.
56-
VEC4_T v = VEC4_T(texelFetch(existing_out, out_pos, 0));
55+
VEC4_T v = load_texel_lpos(existing_out, out_lpos, out_axis_map);
5756

57+
ivec4 in_tidx = out_tidx;
5858
for (int i=0; i<4; i++) {
59-
ivec4 in_whcn = out_whcn;
6059

61-
in_whcn.z = out_whcn.z - dst_channel_offset + i;
60+
in_tidx[packed_dim] = out_tidx[packed_dim] - dst_channel_offset + i;
6261

6362
// Handle the partial update for begining of channel in an existing tensor.
6463
// If the source channel index is below zero or exceeds the range, we skip
6564
// updating the element to avoid overwriting existing data.
66-
if ((in_whcn.z < 0) || (in_whcn.z >= channel_range)) {
65+
if ((in_tidx[packed_dim] < 0) || (in_tidx[packed_dim] >= channel_range)) {
6766
continue;
6867
}
6968

7069
// Readjust for the source offset.
71-
in_whcn.z = in_whcn.z + src_channel_offset;
70+
in_tidx[packed_dim] += src_channel_offset;
7271

73-
ivec4 in_elem_pos = to_texture_elem_pos(in_whcn, in_sizes, packed_dim);
74-
v[i] = VEC4_T(texelFetch(t_in, in_elem_pos.xyz, 0))[in_elem_pos.w];
72+
ivec4 in_posi = tidx_to_posi(in_tidx, in_sizes, in_axis_map, packed_dim);
73+
v[i] = load_texel(t_in, in_posi.xyz)[in_posi.w];
7574
}
7675

77-
imageStore(t_out, out_pos, v);
76+
write_texel_lpos(t_out, out_lpos, v, out_axis_map);
7877
}

backends/vulkan/runtime/graph/ops/impl/Copy.cpp

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -139,28 +139,17 @@ void add_copy_channel_offset_node(
139139
uvec3 local_size = adaptive_work_group_size(global_size);
140140

141141
const struct Block final {
142-
utils::ivec4 out_sizes;
143-
utils::ivec4 in_sizes;
144-
int32_t channel_range;
145-
int32_t src_channel_offset;
146-
int32_t dst_channel_offset;
147-
int32_t unused;
148142
ivec3 range;
149-
int32_t unused1;
143+
int32_t channel_range;
150144
ivec3 dst_offset;
151-
int32_t unused2;
152-
145+
int32_t dst_channel_offset;
146+
int32_t src_channel_offset;
153147
} channel_offset_params{
154-
utils::make_whcn_ivec4(out_sizes),
155-
utils::make_whcn_ivec4(in_sizes),
156-
channel_range,
157-
src_channel_offset,
158-
dst_channel_offset,
159-
0,
160148
utils::make_ivec3(global_size),
161-
0,
149+
channel_range,
162150
dst_offset,
163-
0,
151+
dst_channel_offset,
152+
src_channel_offset,
164153
};
165154

166155
auto shader = VK_KERNEL_FROM_STR(kernel_name);
@@ -177,7 +166,13 @@ void add_copy_channel_offset_node(
177166
{in, vkapi::MemoryAccessType::READ},
178167
},
179168
// Parameter buffers
180-
{graph.create_params_buffer(channel_offset_params)},
169+
{
170+
t_out->sizes_ubo(),
171+
t_out->axis_map_ubo(),
172+
t_in->sizes_ubo(),
173+
t_in->axis_map_ubo(),
174+
graph.create_params_buffer(channel_offset_params),
175+
},
181176
// Specialization Constants
182177
{}));
183178
}

0 commit comments

Comments
 (0)