@@ -44,23 +44,57 @@ void main() {
4444    return ;
4545  }
4646
47-   //  Starting offset to write at within a texel
48-   const  int  out_lane_offset =  dst_offset[packed_dim] &  0x3;
49-   const  bool  has_lane_offset =  out_lane_offset !=  0 ;
50- 
5147  //  Position in input tensor
52-   const  ivec3  in_pos =  pos +  src_offset.xyz;
48+   ivec3  in_pos =  pos +  src_offset.xyz;
49+   in_pos[packed_dim] =  pos[packed_dim] +  (src_offset[packed_dim] >>  2 );
5350
5451  //  Read input value mapping to this output texel
55-   const  VEC4_T in_value =  load_texel_lpos(t_in, in_pos, in_axis_map);
52+   VEC4_T in_value =  load_texel_lpos(t_in, in_pos, in_axis_map);
53+ 
54+   //  Starting offset to read from a texel
55+   const  int  src_lane_offset =  src_offset[packed_dim] &  0x3;
56+   const  bool  has_src_lane_offset =  src_lane_offset !=  0 ;
57+ 
58+   //  If input lane offset is non zero i.e packed texel is composed from multiple sources
59+   if  (has_src_lane_offset) {
60+     //  Boundary values will come from next input texel in the packed dim.
61+     ivec3  next_in_pos =  in_pos;
62+     next_in_pos[packed_dim] =  in_pos[packed_dim] +  1 ;
63+     VEC4_T next_value =  load_texel_lpos(t_in, next_in_pos, in_axis_map);
64+ 
65+     //  Keep input values from the end of current input pixel based on src_lane_offset
66+     //  offset 1 means the first lane of current input texel is not a part of the output texel
67+     //  offset 2 means first 2 lanes are not and so on
68+     if  (src_lane_offset ==  1 ) {
69+       in_value.xyz =  in_value.yzw;
70+     } else  if  (src_lane_offset ==  2 ) {
71+       in_value.xy =  in_value.zw;
72+     } else  {
73+       in_value.x =  in_value.w;
74+     }
75+     //  Copy next texel's values towards the end of input texel, based on lane offset
76+     //  offset 1 means the first lane from next texel is part of the input texel
77+     //  offset 2 means first 2 lanes from next texel is part of the input texel and so on
78+     if  (src_lane_offset ==  1 ) {
79+       in_value.w =  next_value.x;
80+     } else  if  (src_lane_offset ==  2 ) {
81+       in_value.zw =  next_value.xy;
82+     } else  {
83+       in_value.yzw =  next_value.xyz;
84+     }
85+   }
86+ 
87+   //  Starting offset to write at within a texel
88+   const  int  out_lane_offset =  dst_offset[packed_dim] &  0x3;
89+   const  bool  has_dst_lane_offset =  out_lane_offset !=  0 ;
5690
5791  ivec3  out_pos =  pos +  dst_offset.xyz;
5892  out_pos[packed_dim] =  pos[packed_dim] +  (dst_offset[packed_dim] >>  2 );
5993
6094  VEC4_T out_value;
6195
6296  //  If lane offset is non zero i.e packed texel is composed from multiple sources
63-   if  (has_lane_offset ) {
97+   if  (has_dst_lane_offset ) {
6498    //  When position in packed dim is > 0
6599    if  (pos[packed_dim] >  0 ) {
66100      //  Boundary values will come from previous input texel in the packed dim.
0 commit comments