|
41 | 41 | */ |
42 | 42 | #define alignup4(x) ((x + 3) & -4) |
43 | 43 |
|
| 44 | +/* |
| 45 | + * Input: (W, H, C, N) strides of a tensor |
| 46 | + * Returns: the WHCN index of the fastest moving dimension |
| 47 | + */ |
| 48 | +int find_packed_dim(const ivec4 strides) { |
| 49 | + int packed_dim = 0; |
| 50 | + for (int i = 0; i <= 3; i++) { |
| 51 | + if (strides[i] == 1) { |
| 52 | + packed_dim = i; |
| 53 | + break; |
| 54 | + } |
| 55 | + } |
| 56 | + return packed_dim; |
| 57 | +} |
| 58 | + |
44 | 59 | // |
45 | 60 | // (w, h, c, n) Tensor Index <-> Contiguous Buffer Index Conversion |
46 | 61 | // |
@@ -74,27 +89,49 @@ ivec4 from_nchw_buffer_i(int buf_i, ivec4 sizes) { |
74 | 89 | (buf_i / (sizes.x * sizes.y * sizes.z))); |
75 | 90 | } |
76 | 91 |
|
| 92 | +int to_nchw_buffer_i(const ivec4 tensor_idx, const ivec4 sizes) { |
| 93 | + return tensor_idx.w * sizes.x * sizes.y * sizes.z + |
| 94 | + tensor_idx.z * sizes.x * sizes.y + tensor_idx.y * sizes.x + tensor_idx.x; |
| 95 | +} |
| 96 | + |
77 | 97 | /* |
78 | 98 | * Input: Texel buffer index, (W, H, C, N) strides of a tensor, which dim is |
79 | 99 | * packed along a texel |
80 | | - * Returns: The (x, y, z, n) texel position corresponding to the first element |
81 | | - * of the texel at the specified buffer index |
| 100 | + * Returns: The (w, h, c, n) tensor index corresponding to the buffer element |
82 | 101 | */ |
83 | | -ivec4 to_tensor_idx(int buf_i, ivec4 strides, int packed_dim) { |
| 102 | +ivec4 to_tensor_idx(int buffer_id, const ivec4 strides, const int packed_dim) { |
84 | 103 | ivec4 idx; |
85 | 104 | for (int i = 3; i >= 0; i--) { |
86 | 105 | if (i != packed_dim) { |
87 | | - idx[i] = buf_i / strides[i]; |
88 | | - buf_i %= strides[i]; |
| 106 | + idx[i] = buffer_id / strides[i]; |
| 107 | + buffer_id %= strides[i]; |
89 | 108 | } |
90 | 109 | } |
91 | | - idx[packed_dim] = buf_i; |
| 110 | + idx[packed_dim] = buffer_id; |
92 | 111 | return idx; |
93 | 112 | } |
94 | 113 |
|
95 | | -int to_texel_idx(const ivec4 texel_pos, ivec4 strides) { |
96 | | - return texel_pos.x * strides.x + texel_pos.y * strides.y + |
97 | | - texel_pos.z * strides.z + texel_pos.w * strides.w; |
| 114 | +/* |
| 115 | + * Input: Texel buffer index, (W, H, C, N) strides of a tensor |
| 116 | + * Returns: The (w, h, c, n) tensor index corresponding to the buffer element |
| 117 | + * |
| 118 | + * This is a convenience overload of the above function. If the packed dim is |
| 119 | + * not known, it can be found by finding the first dimension with a stride of 1. |
| 120 | + * However, this process adds some overhead, so if performance is a concern then |
| 121 | + * the above function should be used instead so that the packed dim is provided. |
| 122 | + */ |
| 123 | +ivec4 to_tensor_idx(int buffer_id, const ivec4 strides) { |
| 124 | + int packed_dim = find_packed_dim(strides); |
| 125 | + return to_tensor_idx(buffer_id, strides, packed_dim); |
| 126 | +} |
| 127 | + |
| 128 | +/* |
| 129 | + * Input: (w, h, c, n) tensor index, (W, H, C, N) strides of the tensor buffer |
| 130 | + * Returns: the buffer index corresponding to the specified tensor index |
| 131 | + */ |
| 132 | +int to_buffer_id(const ivec4 tensor_idx, ivec4 strides) { |
| 133 | + return tensor_idx.x * strides.x + tensor_idx.y * strides.y + |
| 134 | + tensor_idx.z * strides.z + tensor_idx.w * strides.w; |
98 | 135 | } |
99 | 136 |
|
100 | 137 | // |
|
0 commit comments