@@ -33,16 +33,6 @@ void add_copy_offset_node(
3333  add_dtype_suffix (kernel_name, *t_out);
3434  add_storage_type_suffix (kernel_name, *t_out);
3535
36-   const  struct  Block  final  {
37-     alignas (16 ) ivec3 range;
38-     alignas (16 ) ivec3 src_offset;
39-     alignas (16 ) ivec3 dst_offset;
40-   } offset_params{
41-       range,
42-       src_offset,
43-       dst_offset,
44-   };
45- 
4636  auto  shader = VK_KERNEL_FROM_STR (kernel_name);
4737
4838  graph.execute_nodes ().emplace_back (new  DispatchNode (
@@ -56,11 +46,16 @@ void add_copy_offset_node(
5646          {in, vkapi::kRead },
5747      },
5848      //  Parameter buffers
59-       {
60-           graph.create_params_buffer (offset_params),
61-       },
49+       {},
6250      //  Specialization Constants
63-       {graph.hashed_layout_of (out), graph.hashed_layout_of (in)}));
51+       {graph.hashed_layout_of (out), graph.hashed_layout_of (in)},
52+       nullptr ,
53+       {},
54+       {
55+           PushConstantDataInfo (&range, sizeof (range), sizeof (utils::ivec4)),
56+           PushConstantDataInfo (&src_offset, sizeof (src_offset), sizeof (utils::ivec4)),
57+           PushConstantDataInfo (&dst_offset, sizeof (dst_offset), sizeof (utils::ivec4)),
58+       }));
6459}
6560
6661void  add_copy_channel_offset_node (
@@ -128,28 +123,23 @@ void add_copy_channel_offset_node(
128123    //  The shader combines the global invocation id and the dst_offset to get
129124    //  the actual coordinate.
130125
131-     ivec3 dst_offset{
126+     const   ivec3 dst_offset{
132127        0 , 0 , dst_first_z + batch_idx * utils::div_up_4 (out_channels)};
133128
134-     uvec3 global_size{
129+     const   uvec3 global_size{
135130        utils::safe_downcast<uint32_t >(dim_at<kWidth4D >(in_sizes)),
136131        utils::safe_downcast<uint32_t >(dim_at<kHeight4D >(in_sizes)),
137132        utils::safe_downcast<uint32_t >(dst_last_z - dst_first_z + 1 )};
138-     uvec3 local_size = graph.create_local_wg_size (global_size);
139- 
140-     const  struct  Block  final  {
141-       ivec3 range;
142-       int32_t  channel_range;
143-       ivec3 dst_offset;
144-       int32_t  dst_channel_offset;
145-       int32_t  src_channel_offset;
146-     } channel_offset_params{
147-         utils::make_ivec3 (global_size),
148-         channel_range,
149-         dst_offset,
150-         dst_channel_offset,
151-         src_channel_offset,
152-     };
133+     const  uvec3 local_size = graph.create_local_wg_size (global_size);
134+ 
135+     const  utils::ivec4 range_params = {
136+         static_cast <int >(global_size[0 ]),
137+         static_cast <int >(global_size[1 ]),
138+         static_cast <int >(global_size[2 ]),
139+         channel_range};
140+ 
141+     const  utils::ivec4 offset_params = {
142+         dst_offset[0 ], dst_offset[1 ], dst_offset[2 ], dst_channel_offset};
153143
154144    auto  shader = VK_KERNEL_FROM_STR (kernel_name);
155145
@@ -165,13 +155,17 @@ void add_copy_channel_offset_node(
165155            {in, vkapi::MemoryAccessType::READ},
166156        },
167157        //  Parameter buffers
168-         {
169-             t_out->sizes_ubo (),
170-             t_in->sizes_ubo (),
171-             graph.create_params_buffer (channel_offset_params),
172-         },
158+         {},
173159        //  Specialization Constants
174-         {graph.hashed_layout_of (out), graph.hashed_layout_of (in)}));
160+         {graph.hashed_layout_of (out), graph.hashed_layout_of (in)},
161+         nullptr ,
162+         {},
163+         {graph.sizes_pc_of (out),
164+          graph.sizes_pc_of (in),
165+          PushConstantDataInfo (&range_params, sizeof (range_params)),
166+          PushConstantDataInfo (&offset_params, sizeof (offset_params)),
167+          PushConstantDataInfo (
168+              &src_channel_offset, sizeof (src_channel_offset))}));
175169  }
176170}
177171
0 commit comments