|
68 | 68 | */ |
69 | 69 | #define mod4(x) ((x) & 3) |
70 | 70 |
|
71 | | -/* |
72 | | - * Find the packed dimension of a tensor given its strides. The packed dimension |
73 | | - * is the "fastest moving" dimension which will have a stride of 1. |
74 | | - */ |
75 | | -int find_packed_dim(const ivec4 strides) { |
76 | | - int packed_dim = 0; |
77 | | - for (int i = 0; i <= 3; i++) { |
78 | | - if (strides[i] == 1) { |
79 | | - packed_dim = i; |
80 | | - break; |
81 | | - } |
82 | | - } |
83 | | - return packed_dim; |
84 | | -} |
85 | | - |
86 | 71 | /* |
87 | 72 | * Get the staging buffer indices that contain the data of the texel that |
88 | 73 | * corresponds to the provided tensor index. Since the texel have 4 elements, |
@@ -129,27 +114,26 @@ int tidx_to_nchwi(const ivec4 tidx, const ivec4 sizes) { |
129 | 114 | tidx.x; |
130 | 115 | } |
131 | 116 |
|
132 | | -// TODO(ssjia): make this function use dim order so that it can work with any |
133 | | -// dim order. Currently it assumes that the dim order is contiguous, except for |
134 | | -// the packed dim. |
135 | | -ivec4 bufi_to_tidx(int bufi, const ivec4 strides, const int packed_dim) { |
| 117 | +ivec4 bufi_to_tidx(int bufi, const ivec4 strides, const ivec4 dim_order) { |
136 | 118 | ivec4 idx; |
137 | 119 | for (int i = 3; i >= 0; i--) { |
138 | | - if (i != packed_dim) { |
139 | | - idx[i] = bufi / strides[i]; |
140 | | - bufi %= strides[i]; |
141 | | - } |
| 120 | + int dim = dim_order[i]; |
| 121 | + idx[dim] = bufi / strides[dim]; |
| 122 | + bufi %= strides[dim]; |
142 | 123 | } |
143 | | - idx[packed_dim] = bufi; |
144 | 124 | return idx; |
145 | 125 | } |
146 | 126 |
|
147 | | -// Convenience overload of the above function, which will determine the packed |
148 | | -// dim from the strides automatically so it doesn't have to be passed in as a |
149 | | -// function argument. |
150 | | -ivec4 bufi_to_tidx(const int bufi, const ivec4 strides) { |
151 | | - int packed_dim = find_packed_dim(strides); |
152 | | - return bufi_to_tidx(bufi, strides, packed_dim); |
| 127 | +/* |
| 128 | + * bufi_to_tidx but assumes that the tensor is contiguous |
| 129 | + */ |
| 130 | +ivec4 contiguous_bufi_to_tidx(int bufi, const ivec4 strides) { |
| 131 | + ivec4 idx; |
| 132 | + for (int i = 3; i >= 0; i--) { |
| 133 | + idx[i] = bufi / strides[i]; |
| 134 | + bufi %= strides[i]; |
| 135 | + } |
| 136 | + return idx; |
153 | 137 | } |
154 | 138 |
|
155 | 139 | int tidx_to_bufi(const ivec4 tidx, ivec4 strides) { |
@@ -269,12 +253,22 @@ ivec3 lpos_to_pos(const ivec3 lpos, const ivec4 axis_map) { |
269 | 253 | * e.g. 0x11021, 1 -> ivec4(1, 2, 0, 1) |
270 | 254 | */ |
271 | 255 | #define unhash_axis_map(hash) \ |
272 | | - ivec4(hash & 0xf, (hash >> 4) & 0xf, (hash >> 8 & 0xf), (hash >> 12 & 0xf)) |
| 256 | + (ivec4(hash & 0xf, (hash >> 4) & 0xf, (hash >> 8 & 0xf), (hash >> 12 & 0xf))) |
| 257 | + |
| 258 | +/* |
| 259 | + * |
| 260 | + */ |
| 261 | +#define unhash_dim_order(hash) \ |
| 262 | + (ivec4(hash & 0xf, (hash >> 4) & 0xf, (hash >> 8 & 0xf), (hash >> 12 & 0xf))) |
273 | 263 |
|
274 | 264 | #define unhash_packed_dim(hash) int(hash >> 16 & 0xf) |
275 | 265 |
|
276 | 266 | #define DEFAULT_LAYOUT 0x02210 |
277 | 267 |
|
| 268 | +#define DEFAULT_DIM_ORDER 0x03210 |
| 269 | + |
| 270 | +#define DEFAULT_DIM_ORDER_IVEC4 ivec4(0, 1, 2, 3) |
| 271 | + |
278 | 272 | /************************ |
279 | 273 | * Deprecated Functions * |
280 | 274 | ************************/ |
|
0 commit comments