Skip to content

Commit dc8cf68

Browse files
Transp2d + Transp1d refactor + minor syntax updates (#36)
* initial definitions * pytest, minor stylistic refactors
1 parent c506e1f commit dc8cf68

File tree

5 files changed

+1589
-729
lines changed

5 files changed

+1589
-729
lines changed

include/k2c_conv_transpose_layer.c

Lines changed: 172 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -28,39 +28,47 @@ void k2c_conv1d_transpose(k2c_tensor *output, const k2c_tensor *input,
2828

2929
const size_t ker_dim12 = n_channels * n_filters;
3030

31-
size_t cs = 0;
32-
size_t ce = 0;
33-
size_t ts = 0;
34-
size_t ks = 0;
31+
// changed some names for refactor clarity
32+
size_t output_start_idx = 0; // cs
33+
size_t output_end_idx = 0; // ce
34+
size_t output_raw_idx = 0; // ts
35+
size_t kernel_offset = 0; // ks
3536

3637
for (size_t f = 0; f < n_filters; ++f)
3738
{
3839
for (size_t ch = 0; ch < n_channels; ++ch)
3940
{
4041
for (size_t t = 0; t < n_height; ++t)
4142
{
42-
ts = t * stride;
43-
if (ts > start_crop)
43+
output_raw_idx = t * stride;
44+
45+
// start index
46+
if (output_raw_idx > start_crop)
4447
{
45-
cs = ts - start_crop;
48+
output_start_idx = output_raw_idx - start_crop;
4649
}
4750
else
4851
{
49-
cs = 0;
52+
output_start_idx = 0;
5053
}
51-
if (ts + k_size - start_crop > out_height)
54+
55+
// end index
56+
if (output_raw_idx + k_size - start_crop > out_height)
5257
{
53-
ce = out_height;
58+
output_end_idx = out_height;
5459
}
5560
else
5661
{
57-
ce = ts + k_size - start_crop;
62+
output_end_idx = output_raw_idx + k_size - start_crop;
5863
}
59-
ks = cs - (ts - start_crop);
60-
for (size_t i = 0; i < ce - cs; ++i)
64+
65+
kernel_offset = output_start_idx - (output_raw_idx - start_crop);
66+
67+
// convolution
68+
for (size_t i = 0; i < output_end_idx - output_start_idx; ++i)
6169
{
62-
output->array[(i + cs) * n_filters + f] +=
63-
kernel->array[(i + ks) * ker_dim12 + f * n_channels + ch] *
70+
output->array[(i + output_start_idx) * n_filters + f] +=
71+
kernel->array[(i + kernel_offset) * ker_dim12 + f * n_channels + ch] *
6472
input->array[t * n_channels + ch];
6573
}
6674
}
@@ -71,3 +79,152 @@ void k2c_conv1d_transpose(k2c_tensor *output, const k2c_tensor *input,
7179
k2c_bias_add(output, bias);
7280
activation(output->array, output->numel);
7381
}
82+
83+
/**
84+
* 2D Transposed Convolution (Deconvolution).
85+
* Assumes a "channels last" structure.
86+
*
87+
* :param output: output tensor.
88+
* :param input: input tensor.
89+
* :param kernel: kernel tensor.
90+
* :param bias: bias tensor.
91+
* :param stride: array[2] {stride_height, stride_width}.
92+
* :param dilation: array[2] {dilation_height, dilation_width}.
93+
* (Note: Logic below assumes dilation is 1 for the optimized bounds check).
94+
* :param padding: array[2] {crop_top, crop_left}.
95+
* Amount to crop from the output (inverse of padding).
96+
* :param activation: activation function to apply to output.
97+
*/
98+
void k2c_conv2d_transpose(k2c_tensor *output, const k2c_tensor *input,
99+
const k2c_tensor *kernel, const k2c_tensor *bias,
100+
const size_t *stride, const size_t *dilation,
101+
const size_t *padding, k2c_activationType *activation)
102+
{
103+
// Initialize output memory to zero
104+
memset(output->array, 0, output->numel * sizeof(output->array[0]));
105+
106+
// --- Dimensions ---
107+
const size_t in_rows = input->shape[0];
108+
const size_t in_cols = input->shape[1];
109+
const size_t in_channels = input->shape[2];
110+
111+
// Kernel Shape: {Rows, Cols, InChannels, OutChannels} based on reference
112+
const size_t k_rows = kernel->shape[0];
113+
const size_t k_cols = kernel->shape[1];
114+
const size_t n_filters = kernel->shape[3];
115+
116+
const size_t out_rows = output->shape[0];
117+
const size_t out_cols = output->shape[1];
118+
119+
// Access strides/padding from arrays
120+
const size_t stride_h = stride[0];
121+
const size_t stride_w = stride[1];
122+
const size_t crop_h = padding[0];
123+
const size_t crop_w = padding[1];
124+
125+
// Pre-calculate dimensional steps for Kernel
126+
// Kernel index math: z0 * (cols*in*out) + z1 * (in*out) + q * (out) + k
127+
// Note: This matches the "Out-Channel Last" memory layout of the reference.
128+
const size_t k_step_row = kernel->shape[1] * kernel->shape[2] * kernel->shape[3];
129+
const size_t k_step_col = kernel->shape[2] * kernel->shape[3];
130+
const size_t k_step_in = kernel->shape[3];
131+
132+
// --- Window Variables ---
133+
// Vertical (Rows)
134+
size_t row_raw_idx, row_start_idx, row_end_idx, row_ker_offset;
135+
// Horizontal (Cols)
136+
size_t col_raw_idx, col_start_idx, col_end_idx, col_ker_offset;
137+
138+
// Loop 1: Filters (Output Channels)
139+
for (size_t f = 0; f < n_filters; ++f)
140+
{
141+
// Loop 2: Input Channels
142+
for (size_t ch = 0; ch < in_channels; ++ch)
143+
{
144+
// Loop 3: Input Rows
145+
for (size_t r = 0; r < in_rows; ++r)
146+
{
147+
// === Vertical Bounds Calculation (Similar to 1D) ===
148+
row_raw_idx = r * stride_h;
149+
150+
// Clamp Top
151+
if (row_raw_idx > crop_h)
152+
row_start_idx = row_raw_idx - crop_h;
153+
else
154+
row_start_idx = 0;
155+
156+
// Clamp Bottom
157+
if (row_raw_idx + k_rows - crop_h > out_rows)
158+
row_end_idx = out_rows;
159+
else
160+
row_end_idx = row_raw_idx + k_rows - crop_h;
161+
162+
// Kernel Offset (Vertical)
163+
row_ker_offset = row_start_idx - (row_raw_idx - crop_h);
164+
165+
166+
// Loop 4: Input Columns
167+
for (size_t c = 0; c < in_cols; ++c)
168+
{
169+
// === Horizontal Bounds Calculation ===
170+
col_raw_idx = c * stride_w;
171+
172+
// Clamp Left
173+
if (col_raw_idx > crop_w)
174+
col_start_idx = col_raw_idx - crop_w;
175+
else
176+
col_start_idx = 0;
177+
178+
// Clamp Right
179+
if (col_raw_idx + k_cols - crop_w > out_cols)
180+
col_end_idx = out_cols;
181+
else
182+
col_end_idx = col_raw_idx + k_cols - crop_w;
183+
184+
// Kernel Offset (Horizontal)
185+
col_ker_offset = col_start_idx - (col_raw_idx - crop_w);
186+
187+
// Pre-calculate Input Value
188+
// Input Index: r * (cols*ch) + c * (ch) + ch
189+
float input_val = input->array[r * (in_cols * in_channels) + c * in_channels + ch];
190+
191+
// === Inner Loops (Spatial Accumulation) ===
192+
// Iterating over the VALID intersection of kernel and output
193+
size_t valid_h = row_end_idx - row_start_idx;
194+
size_t valid_w = col_end_idx - col_start_idx;
195+
196+
for (size_t kr = 0; kr < valid_h; ++kr)
197+
{
198+
for (size_t kc = 0; kc < valid_w; ++kc)
199+
{
200+
// 1. Output Index
201+
// Row: (kr + row_start_idx)
202+
// Col: (kc + col_start_idx)
203+
// Channel: f
204+
size_t out_r = kr + row_start_idx;
205+
size_t out_c = kc + col_start_idx;
206+
207+
size_t out_idx = out_r * (out_cols * n_filters) + out_c * n_filters + f;
208+
209+
// 2. Kernel Index
210+
// Row: (kr + row_ker_offset)
211+
// Col: (kc + col_ker_offset)
212+
// InChannel: ch
213+
// OutChannel: f
214+
size_t k_r = kr + row_ker_offset;
215+
size_t k_c = kc + col_ker_offset;
216+
217+
size_t ker_idx = k_r * k_step_row + k_c * k_step_col + ch * k_step_in + f;
218+
219+
// 3. Accumulate
220+
output->array[out_idx] += kernel->array[ker_idx] * input_val;
221+
}
222+
}
223+
}
224+
}
225+
}
226+
}
227+
228+
k2c_bias_add(output, bias);
229+
activation(output->array, output->numel);
230+
}

include/k2c_include.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,9 @@ void k2c_upsampling3d(k2c_tensor *output, const k2c_tensor *input, const size_t
6868
void k2c_conv1d_transpose(k2c_tensor *output, const k2c_tensor *input, const k2c_tensor *kernel,
6969
const k2c_tensor *bias, const size_t stride, const size_t start_crop,
7070
k2c_activationType *activation);
71+
void k2c_conv2d_transpose(k2c_tensor *output, const k2c_tensor *input, const k2c_tensor *kernel,
72+
const k2c_tensor *bias, const size_t *stride, const size_t *dilation,
73+
const size_t *padding, k2c_activationType *activation);
7174

7275
// Core Layers
7376
void k2c_dense(k2c_tensor *output, const k2c_tensor *input, const k2c_tensor *kernel,

0 commit comments

Comments
 (0)