@@ -34,7 +34,7 @@ __device__ inline int calculate_input_coord(int out_coord, int kern_coord, int s
3434
3535// ───────────── Memory layout abstractions ─────────────
3636
37- struct WHCNLayout {
37+ struct whcn_layout {
3838 __device__ static int input_index (int n, int c, int y, int x, const conv_params & params) {
3939 return n * (params.channels * params.in_w * params.in_h ) + c * params.in_w * params.in_h + y * params.in_w + x;
4040 }
@@ -57,7 +57,7 @@ struct WHCNLayout {
5757 }
5858};
5959
60- struct CWHNLayout {
60+ struct cwhn_layout {
6161 __device__ static int input_index (int n, int c, int y, int x, const conv_params & params) {
6262 return n * (params.channels * params.in_w * params.in_h ) + (y * params.in_w + x) * params.channels + c;
6363 }
@@ -125,10 +125,10 @@ __global__ void conv2d_dw_whcn_kernel(const T * __restrict__ in, const T * __res
125125 stride_y, padding_x, padding_y, dilation_x, dilation_y, channels, batches };
126126
127127 int batch_idx, channel_idx, out_y_idx, out_x_idx;
128- WHCNLayout ::unpack_indices (global_idx, params, batch_idx, channel_idx, out_y_idx, out_x_idx);
128+ whcn_layout ::unpack_indices (global_idx, params, batch_idx, channel_idx, out_y_idx, out_x_idx);
129129
130- T result = compute_conv2d_dw_pixel<T, WHCNLayout >(in, kern, params, batch_idx, channel_idx, out_y_idx, out_x_idx);
131- out[WHCNLayout ::output_index (batch_idx, channel_idx, out_y_idx, out_x_idx, params)] = result;
130+ T result = compute_conv2d_dw_pixel<T, whcn_layout >(in, kern, params, batch_idx, channel_idx, out_y_idx, out_x_idx);
131+ out[whcn_layout ::output_index (batch_idx, channel_idx, out_y_idx, out_x_idx, params)] = result;
132132}
133133
134134template <typename T>
@@ -148,11 +148,11 @@ __global__ void conv_2d_dw_cwhn_kernel(const T * __restrict__ in, const T * __re
148148 stride_y, padding_x, padding_y, dilation_x, dilation_y, channels, batches };
149149
150150 int batch_idx, channel_idx, out_y_idx, out_x_idx;
151- CWHNLayout ::unpack_indices (global_idx, params, batch_idx, channel_idx, out_y_idx, out_x_idx);
151+ cwhn_layout ::unpack_indices (global_idx, params, batch_idx, channel_idx, out_y_idx, out_x_idx);
152152
153153 const T result =
154- compute_conv2d_dw_pixel<T, CWHNLayout >(in, kern, params, batch_idx, channel_idx, out_y_idx, out_x_idx);
155- out[CWHNLayout ::output_index (batch_idx, channel_idx, out_y_idx, out_x_idx, params)] = result;
154+ compute_conv2d_dw_pixel<T, cwhn_layout >(in, kern, params, batch_idx, channel_idx, out_y_idx, out_x_idx);
155+ out[cwhn_layout ::output_index (batch_idx, channel_idx, out_y_idx, out_x_idx, params)] = result;
156156}
157157
158158// ───────────── dispatcher ─────────────
@@ -197,7 +197,6 @@ void ggml_cuda_op_conv2d_dw(ggml_backend_cuda_context & ctx, ggml_tensor * dst)
197197 x_d, w_d, y_d, in_w, in_h, out_w, out_h, kernel_w, kernel_h, stride_x, stride_y, padding_x, padding_y,
198198 dilation_x, dilation_y, channels, batches);
199199 } else {
200- // Unsupported memory layout
201200 GGML_ABORT (" Unsupported memory layout for conv_2d_dw" );
202201 }
203202}
0 commit comments