@@ -71,21 +71,17 @@ void add_copy_packed_dim_offset_node(
7171    const  ivec3& range,
7272    const  ivec4& src_offset,
7373    const  ivec4& dst_offset,
74-     const  ValueRef out,
75-     bool  repeat) {
74+     const  ValueRef out) {
7675  vTensorPtr t_in = graph.get_tensor (in);
7776  vTensorPtr t_out = graph.get_tensor (out);
7877
79-   //  Check the packed dimension is same for both tensors
80-   VK_CHECK_COND (check_same_packed_dim (*t_in, *t_out));
81-   if  (!repeat) {
82-     //  For non repeat copy also check if the packed dimension is Width or
83-     //  Height. Since the function does not support channel packing.
84-     VK_CHECK_COND (
85-         check_same_packed_dim (*t_in, *t_out) &&
86-         (check_packed_dim_is (*t_in, WHCN::kWidthDim ) ||
87-          check_packed_dim_is (*t_in, WHCN::kHeightDim )));
88-   }
78+   //  Check the packed dimension is same for both tensors, also check if the
79+   //  packed dimension is Width or Height. Since the function does not support
80+   //  channel packing.
81+   VK_CHECK_COND (
82+       check_same_packed_dim (*t_in, *t_out) &&
83+       (check_packed_dim_is (*t_in, WHCN::kWidthDim ) ||
84+        check_packed_dim_is (*t_in, WHCN::kHeightDim )));
8985
9086  std::string kernel_name = " copy_packed_dim_offset" 
9187  kernel_name.reserve (kShaderNameReserve );
@@ -96,43 +92,41 @@ void add_copy_packed_dim_offset_node(
9692      range[0 ], range[1 ], range[2 ], dim_at (t_in->sizes (), kBatch4D )};
9793  ivec3 global_wg_size = t_out->logical_limits ();
9894
99-   if  (!repeat) {
100-     const  auto  packed_dim = t_in->packed_dim ();
101-     //  The starting offset in a texel where this tensor will start copying from
102-     const  auto  src_lane_offset = src_offset[packed_dim] & 0x3 ;
103-     //  The starting offset in a texel where this tensor will start copying to
104-     const  auto  dst_lane_offset = dst_offset[packed_dim] & 0x3 ;
105- 
106-     //  The total packed texels this tensor will be copied from
107-     //  The first texel of tensor data in packed dimension will be copied from
108-     //  remaining lanes from current source Hence (4 - src_lane_offset) is added
109-     //  to tensor size in packed dimension
110-     const  auto  src_packed_size = utils::div_up_4 (
111-         (4  - src_lane_offset) +
112-         dim_at (t_out->sizes (), normalize_to_dim_index (*t_out, packed_dim)));
113- 
114-     //  The total packed texels this tensor will be copied to
115-     //  The first texel of tensor data in packed dimension will be copied to
116-     //  remaining lanes from previous write Hence (4 - dst_lane_offset) is added
117-     //  to tensor size in packed dimension
118-     const  auto  dst_packed_size = utils::div_up_4 (
119-         (4  - dst_lane_offset) +
120-         dim_at (t_in->sizes (), normalize_to_dim_index (*t_in, packed_dim)));
121- 
122-     //  If the starting src offset is not 0, and the total packed texels is
123-     //  greater than the source texel range
124-     const  bool  has_additional_src_work =
125-         src_lane_offset != 0  && src_packed_size > final_range[packed_dim];
126-     //  If the starting dst offset is not 0, and the total packed texels is
127-     //  greater than the source texel range
128-     const  bool  has_additional_dst_work =
129-         dst_lane_offset != 0  && dst_packed_size > final_range[packed_dim];
130- 
131-     if  (has_additional_src_work || has_additional_dst_work) {
132-       global_wg_size[packed_dim]++; //  Increase the global work group size in
133-                                     //  packed dimension
134-       final_range[packed_dim]++; //  Increase the range in packed dimension
135-     }
95+   const  auto  packed_dim = t_in->packed_dim ();
96+   //  The starting offset in a texel where this tensor will start copying from
97+   const  auto  src_lane_offset = src_offset[packed_dim] & 0x3 ;
98+   //  The starting offset in a texel where this tensor will start copying to
99+   const  auto  dst_lane_offset = dst_offset[packed_dim] & 0x3 ;
100+ 
101+   //  The total packed texels this tensor will be copied from
102+   //  The first texel of tensor data in packed dimension will be copied from
103+   //  remaining lanes from current source Hence (4 - src_lane_offset) is added
104+   //  to tensor size in packed dimension
105+   const  auto  src_packed_size = utils::div_up_4 (
106+       (4  - src_lane_offset) +
107+       dim_at (t_out->sizes (), normalize_to_dim_index (*t_out, packed_dim)));
108+ 
109+   //  The total packed texels this tensor will be copied to
110+   //  The first texel of tensor data in packed dimension will be copied to
111+   //  remaining lanes from previous write Hence (4 - dst_lane_offset) is added
112+   //  to tensor size in packed dimension
113+   const  auto  dst_packed_size = utils::div_up_4 (
114+       (4  - dst_lane_offset) +
115+       dim_at (t_in->sizes (), normalize_to_dim_index (*t_in, packed_dim)));
116+ 
117+   //  If the starting src offset is not 0, and the total packed texels is
118+   //  greater than the source texel range
119+   const  bool  has_additional_src_work =
120+       src_lane_offset != 0  && src_packed_size > final_range[packed_dim];
121+   //  If the starting dst offset is not 0, and the total packed texels is
122+   //  greater than the source texel range
123+   const  bool  has_additional_dst_work =
124+       dst_lane_offset != 0  && dst_packed_size > final_range[packed_dim];
125+ 
126+   if  (has_additional_src_work || has_additional_dst_work) {
127+     global_wg_size[packed_dim]++; //  Increase the global work group size in
128+                                   //  packed dimension
129+     final_range[packed_dim]++; //  Increase the range in packed dimension
136130  }
137131
138132  auto  shader = VK_KERNEL_FROM_STR (kernel_name);
@@ -151,7 +145,7 @@ void add_copy_packed_dim_offset_node(
151145      //  Parameter buffers
152146      {},
153147      //  Specialization Constants
154-       {graph.hashed_layout_of (out), graph.hashed_layout_of (in), repeat },
148+       {graph.hashed_layout_of (out), graph.hashed_layout_of (in)},
155149      nullptr ,
156150      {},
157151      {
0 commit comments