@@ -44,23 +44,57 @@ void main() {
4444 return ;
4545 }
4646
47- // Starting offset to write at within a texel
48- const int out_lane_offset = dst_offset[packed_dim] & 0x3;
49- const bool has_lane_offset = out_lane_offset != 0 ;
50-
5147 // Position in input tensor
52- const ivec3 in_pos = pos + src_offset.xyz;
48+ ivec3 in_pos = pos + src_offset.xyz;
49+ in_pos[packed_dim] = pos[packed_dim] + (src_offset[packed_dim] >> 2 );
5350
5451 // Read input value mapping to this output texel
55- const VEC4_T in_value = load_texel_lpos(t_in, in_pos, in_axis_map);
52+ VEC4_T in_value = load_texel_lpos(t_in, in_pos, in_axis_map);
53+
54+ // Starting offset to read from a texel
55+ const int src_lane_offset = src_offset[packed_dim] & 0x3;
56+ const bool has_src_lane_offset = src_lane_offset != 0 ;
57+
58+ // If input lane offset is non zero i.e packed texel is composed from multiple sources
59+ if (has_src_lane_offset) {
60+ // Boundary values will come from next input texel in the packed dim.
61+ ivec3 next_in_pos = in_pos;
62+ next_in_pos[packed_dim] = in_pos[packed_dim] + 1 ;
63+ VEC4_T next_value = load_texel_lpos(t_in, next_in_pos, in_axis_map);
64+
65+ // Keep input values from the end of current input pixel based on src_lane_offset
66+ // offset 1 means the first lane of current input texel is not a part of the output texel
67+ // offset 2 means first 2 lanes are not and so on
68+ if (src_lane_offset == 1 ) {
69+ in_value.xyz = in_value.yzw;
70+ } else if (src_lane_offset == 2 ) {
71+ in_value.xy = in_value.zw;
72+ } else {
73+ in_value.x = in_value.w;
74+ }
75+ // Copy next texel's values towards the end of input texel, based on lane offset
76+ // offset 1 means the first lane from next texel is part of the input texel
77+ // offset 2 means first 2 lanes from next texel is part of the input texel and so on
78+ if (src_lane_offset == 1 ) {
79+ in_value.w = next_value.x;
80+ } else if (src_lane_offset == 2 ) {
81+ in_value.zw = next_value.xy;
82+ } else {
83+ in_value.yzw = next_value.xyz;
84+ }
85+ }
86+
87+ // Starting offset to write at within a texel
88+ const int out_lane_offset = dst_offset[packed_dim] & 0x3;
89+ const bool has_dst_lane_offset = out_lane_offset != 0 ;
5690
5791 ivec3 out_pos = pos + dst_offset.xyz;
5892 out_pos[packed_dim] = pos[packed_dim] + (dst_offset[packed_dim] >> 2 );
5993
6094 VEC4_T out_value;
6195
6296 // If lane offset is non zero i.e packed texel is composed from multiple sources
63- if (has_lane_offset ) {
97+ if (has_dst_lane_offset ) {
6498 // When position in packed dim is > 0
6599 if (pos[packed_dim] > 0 ) {
66100 // Boundary values will come from previous input texel in the packed dim.
0 commit comments