|
68 | 68 | */
|
69 | 69 | #define mod4(x) ((x) & 3)
|
70 | 70 |
|
71 |
| -/* |
72 |
| - * Find the packed dimension of a tensor given its strides. The packed dimension |
73 |
| - * is the "fastest moving" dimension which will have a stride of 1. |
74 |
| - */ |
75 |
| -int find_packed_dim(const ivec4 strides) { |
76 |
| - int packed_dim = 0; |
77 |
| - for (int i = 0; i <= 3; i++) { |
78 |
| - if (strides[i] == 1) { |
79 |
| - packed_dim = i; |
80 |
| - break; |
81 |
| - } |
82 |
| - } |
83 |
| - return packed_dim; |
84 |
| -} |
85 |
| - |
86 | 71 | /*
|
87 | 72 | * Get the staging buffer indices that contain the data of the texel that
|
88 | 73 | * corresponds to the provided tensor index. Since the texel have 4 elements,
|
@@ -129,27 +114,26 @@ int tidx_to_nchwi(const ivec4 tidx, const ivec4 sizes) {
|
129 | 114 | tidx.x;
|
130 | 115 | }
|
131 | 116 |
|
132 |
| -// TODO(ssjia): make this function use dim order so that it can work with any |
133 |
| -// dim order. Currently it assumes that the dim order is contiguous, except for |
134 |
| -// the packed dim. |
135 |
| -ivec4 bufi_to_tidx(int bufi, const ivec4 strides, const int packed_dim) { |
| 117 | +ivec4 bufi_to_tidx(int bufi, const ivec4 strides, const ivec4 dim_order) { |
136 | 118 | ivec4 idx;
|
137 | 119 | for (int i = 3; i >= 0; i--) {
|
138 |
| - if (i != packed_dim) { |
139 |
| - idx[i] = bufi / strides[i]; |
140 |
| - bufi %= strides[i]; |
141 |
| - } |
| 120 | + int dim = dim_order[i]; |
| 121 | + idx[dim] = bufi / strides[dim]; |
| 122 | + bufi %= strides[dim]; |
142 | 123 | }
|
143 |
| - idx[packed_dim] = bufi; |
144 | 124 | return idx;
|
145 | 125 | }
|
146 | 126 |
|
147 |
| -// Convenience overload of the above function, which will determine the packed |
148 |
| -// dim from the strides automatically so it doesn't have to be passed in as a |
149 |
| -// function argument. |
150 |
| -ivec4 bufi_to_tidx(const int bufi, const ivec4 strides) { |
151 |
| - int packed_dim = find_packed_dim(strides); |
152 |
| - return bufi_to_tidx(bufi, strides, packed_dim); |
| 127 | +/* |
| 128 | + * bufi_to_tidx but assumes that the tensor is contiguous |
| 129 | + */ |
| 130 | +ivec4 contiguous_bufi_to_tidx(int bufi, const ivec4 strides) { |
| 131 | + ivec4 idx; |
| 132 | + for (int i = 3; i >= 0; i--) { |
| 133 | + idx[i] = bufi / strides[i]; |
| 134 | + bufi %= strides[i]; |
| 135 | + } |
| 136 | + return idx; |
153 | 137 | }
|
154 | 138 |
|
155 | 139 | int tidx_to_bufi(const ivec4 tidx, ivec4 strides) {
|
@@ -269,12 +253,22 @@ ivec3 lpos_to_pos(const ivec3 lpos, const ivec4 axis_map) {
|
269 | 253 | * e.g. 0x11021, 1 -> ivec4(1, 2, 0, 1)
|
270 | 254 | */
|
271 | 255 | #define unhash_axis_map(hash) \
|
272 |
| - ivec4(hash & 0xf, (hash >> 4) & 0xf, (hash >> 8 & 0xf), (hash >> 12 & 0xf)) |
| 256 | + (ivec4(hash & 0xf, (hash >> 4) & 0xf, (hash >> 8 & 0xf), (hash >> 12 & 0xf))) |
| 257 | + |
| 258 | +/* |
| 259 | + * |
| 260 | + */ |
| 261 | +#define unhash_dim_order(hash) \ |
| 262 | + (ivec4(hash & 0xf, (hash >> 4) & 0xf, (hash >> 8 & 0xf), (hash >> 12 & 0xf))) |
273 | 263 |
|
274 | 264 | #define unhash_packed_dim(hash) int(hash >> 16 & 0xf)
|
275 | 265 |
|
276 | 266 | #define DEFAULT_LAYOUT 0x02210
|
277 | 267 |
|
| 268 | +#define DEFAULT_DIM_ORDER 0x03210 |
| 269 | + |
| 270 | +#define DEFAULT_DIM_ORDER_IVEC4 ivec4(0, 1, 2, 3) |
| 271 | + |
278 | 272 | /************************
|
279 | 273 | * Deprecated Functions *
|
280 | 274 | ************************/
|
|
0 commit comments