@@ -96,16 +96,26 @@ int main(int argc, char *argv[]) {
9696 auto in = vin.bit_cast_view <unsigned char , 8 , 32 >();
9797
9898 simd<unsigned char , 6 * 24 > vout;
99- auto out = vout.bit_cast_view <uchar, 6 , 24 >();
10099
101100 simd<float , 6 * 24 > vm;
102101 auto m = vm.bit_cast_view <float , 6 , 24 >();
103102
104103 uint h_pos = it.get_id (0 );
105104 uint v_pos = it.get_id (1 );
106105
107- in = media_block_load<unsigned char , 8 , 32 >(accInput, h_pos * 24 ,
108- v_pos * 6 );
106+ vin = media_block_load<unsigned char , 8 , 32 >(accInput, h_pos * 24 ,
107+ v_pos * 6 );
108+ if (h_pos == range_width - 1 ) {
109+ #pragma unroll
110+ for (int i = 0 ; i < 8 ; i++) {
111+ vin.select <4 , 1 >(i * 32 + 24 ) = vin.select <4 , 1 >(i * 32 + 20 );
112+ vin.select <4 , 1 >(i * 32 + 28 ) = vin.select <4 , 1 >(i * 32 + 20 );
113+ }
114+ }
115+ if (v_pos == range_height - 1 ) {
116+ vin.select <32 , 1 >(7 * 32 ) = vin.select <32 , 1 >(5 * 32 );
117+ vin.select <32 , 1 >(6 * 32 ) = vin.select <32 , 1 >(5 * 32 );
118+ }
109119
110120 m = in.select <6 , 1 , 24 , 1 >(1 , 3 );
111121 m += in.select <6 , 1 , 24 , 1 >(0 , 0 );
@@ -121,7 +131,7 @@ int main(int argc, char *argv[]) {
121131 vout = convert<unsigned char >(vm);
122132
123133 media_block_store<unsigned char , 6 , 24 >(accOutput, h_pos * 24 ,
124- v_pos * 6 , out );
134+ v_pos * 6 , vout );
125135 });
126136 });
127137 e.wait ();
0 commit comments