@@ -28,39 +28,47 @@ void k2c_conv1d_transpose(k2c_tensor *output, const k2c_tensor *input,
2828
2929 const size_t ker_dim12 = n_channels * n_filters ;
3030
31- size_t cs = 0 ;
32- size_t ce = 0 ;
33- size_t ts = 0 ;
34- size_t ks = 0 ;
31+ // changed some names for refactor clarity
32+ size_t output_start_idx = 0 ; // cs
33+ size_t output_end_idx = 0 ; // ce
34+ size_t output_raw_idx = 0 ; // ts
35+ size_t kernel_offset = 0 ; // ks
3536
3637 for (size_t f = 0 ; f < n_filters ; ++ f )
3738 {
3839 for (size_t ch = 0 ; ch < n_channels ; ++ ch )
3940 {
4041 for (size_t t = 0 ; t < n_height ; ++ t )
4142 {
42- ts = t * stride ;
43- if (ts > start_crop )
43+ output_raw_idx = t * stride ;
44+
45+ // start index
46+ if (output_raw_idx > start_crop )
4447 {
45- cs = ts - start_crop ;
48+ output_start_idx = output_raw_idx - start_crop ;
4649 }
4750 else
4851 {
49- cs = 0 ;
52+ output_start_idx = 0 ;
5053 }
51- if (ts + k_size - start_crop > out_height )
54+
55+ // end index
56+ if (output_raw_idx + k_size - start_crop > out_height )
5257 {
53- ce = out_height ;
58+ output_end_idx = out_height ;
5459 }
5560 else
5661 {
57- ce = ts + k_size - start_crop ;
62+ output_end_idx = output_raw_idx + k_size - start_crop ;
5863 }
59- ks = cs - (ts - start_crop );
60- for (size_t i = 0 ; i < ce - cs ; ++ i )
64+
65+ kernel_offset = output_start_idx - (output_raw_idx - start_crop );
66+
67+ // convolution
68+ for (size_t i = 0 ; i < output_end_idx - output_start_idx ; ++ i )
6169 {
62- output -> array [(i + cs ) * n_filters + f ] +=
63- kernel -> array [(i + ks ) * ker_dim12 + f * n_channels + ch ] *
70+ output -> array [(i + output_start_idx ) * n_filters + f ] +=
71+ kernel -> array [(i + kernel_offset ) * ker_dim12 + f * n_channels + ch ] *
6472 input -> array [t * n_channels + ch ];
6573 }
6674 }
@@ -71,3 +79,152 @@ void k2c_conv1d_transpose(k2c_tensor *output, const k2c_tensor *input,
7179 k2c_bias_add (output , bias );
7280 activation (output -> array , output -> numel );
7381}
82+
83+ /**
84+ * 2D Transposed Convolution (Deconvolution).
85+ * Assumes a "channels last" structure.
86+ *
87+ * :param output: output tensor.
88+ * :param input: input tensor.
89+ * :param kernel: kernel tensor.
90+ * :param bias: bias tensor.
91+ * :param stride: array[2] {stride_height, stride_width}.
92+ * :param dilation: array[2] {dilation_height, dilation_width}.
93+ * (Note: Logic below assumes dilation is 1 for the optimized bounds check).
94+ * :param padding: array[2] {crop_top, crop_left}.
95+ * Amount to crop from the output (inverse of padding).
96+ * :param activation: activation function to apply to output.
97+ */
98+ void k2c_conv2d_transpose (k2c_tensor * output , const k2c_tensor * input ,
99+ const k2c_tensor * kernel , const k2c_tensor * bias ,
100+ const size_t * stride , const size_t * dilation ,
101+ const size_t * padding , k2c_activationType * activation )
102+ {
103+ // Initialize output memory to zero
104+ memset (output -> array , 0 , output -> numel * sizeof (output -> array [0 ]));
105+
106+ // --- Dimensions ---
107+ const size_t in_rows = input -> shape [0 ];
108+ const size_t in_cols = input -> shape [1 ];
109+ const size_t in_channels = input -> shape [2 ];
110+
111+ // Kernel Shape: {Rows, Cols, InChannels, OutChannels} based on reference
112+ const size_t k_rows = kernel -> shape [0 ];
113+ const size_t k_cols = kernel -> shape [1 ];
114+ const size_t n_filters = kernel -> shape [3 ];
115+
116+ const size_t out_rows = output -> shape [0 ];
117+ const size_t out_cols = output -> shape [1 ];
118+
119+ // Access strides/padding from arrays
120+ const size_t stride_h = stride [0 ];
121+ const size_t stride_w = stride [1 ];
122+ const size_t crop_h = padding [0 ];
123+ const size_t crop_w = padding [1 ];
124+
125+ // Pre-calculate dimensional steps for Kernel
126+ // Kernel index math: z0 * (cols*in*out) + z1 * (in*out) + q * (out) + k
127+ // Note: This matches the "Out-Channel Last" memory layout of the reference.
128+ const size_t k_step_row = kernel -> shape [1 ] * kernel -> shape [2 ] * kernel -> shape [3 ];
129+ const size_t k_step_col = kernel -> shape [2 ] * kernel -> shape [3 ];
130+ const size_t k_step_in = kernel -> shape [3 ];
131+
132+ // --- Window Variables ---
133+ // Vertical (Rows)
134+ size_t row_raw_idx , row_start_idx , row_end_idx , row_ker_offset ;
135+ // Horizontal (Cols)
136+ size_t col_raw_idx , col_start_idx , col_end_idx , col_ker_offset ;
137+
138+ // Loop 1: Filters (Output Channels)
139+ for (size_t f = 0 ; f < n_filters ; ++ f )
140+ {
141+ // Loop 2: Input Channels
142+ for (size_t ch = 0 ; ch < in_channels ; ++ ch )
143+ {
144+ // Loop 3: Input Rows
145+ for (size_t r = 0 ; r < in_rows ; ++ r )
146+ {
147+ // === Vertical Bounds Calculation (Similar to 1D) ===
148+ row_raw_idx = r * stride_h ;
149+
150+ // Clamp Top
151+ if (row_raw_idx > crop_h )
152+ row_start_idx = row_raw_idx - crop_h ;
153+ else
154+ row_start_idx = 0 ;
155+
156+ // Clamp Bottom
157+ if (row_raw_idx + k_rows - crop_h > out_rows )
158+ row_end_idx = out_rows ;
159+ else
160+ row_end_idx = row_raw_idx + k_rows - crop_h ;
161+
162+ // Kernel Offset (Vertical)
163+ row_ker_offset = row_start_idx - (row_raw_idx - crop_h );
164+
165+
166+ // Loop 4: Input Columns
167+ for (size_t c = 0 ; c < in_cols ; ++ c )
168+ {
169+ // === Horizontal Bounds Calculation ===
170+ col_raw_idx = c * stride_w ;
171+
172+ // Clamp Left
173+ if (col_raw_idx > crop_w )
174+ col_start_idx = col_raw_idx - crop_w ;
175+ else
176+ col_start_idx = 0 ;
177+
178+ // Clamp Right
179+ if (col_raw_idx + k_cols - crop_w > out_cols )
180+ col_end_idx = out_cols ;
181+ else
182+ col_end_idx = col_raw_idx + k_cols - crop_w ;
183+
184+ // Kernel Offset (Horizontal)
185+ col_ker_offset = col_start_idx - (col_raw_idx - crop_w );
186+
187+ // Pre-calculate Input Value
188+ // Input Index: r * (cols*ch) + c * (ch) + ch
189+ float input_val = input -> array [r * (in_cols * in_channels ) + c * in_channels + ch ];
190+
191+ // === Inner Loops (Spatial Accumulation) ===
192+ // Iterating over the VALID intersection of kernel and output
193+ size_t valid_h = row_end_idx - row_start_idx ;
194+ size_t valid_w = col_end_idx - col_start_idx ;
195+
196+ for (size_t kr = 0 ; kr < valid_h ; ++ kr )
197+ {
198+ for (size_t kc = 0 ; kc < valid_w ; ++ kc )
199+ {
200+ // 1. Output Index
201+ // Row: (kr + row_start_idx)
202+ // Col: (kc + col_start_idx)
203+ // Channel: f
204+ size_t out_r = kr + row_start_idx ;
205+ size_t out_c = kc + col_start_idx ;
206+
207+ size_t out_idx = out_r * (out_cols * n_filters ) + out_c * n_filters + f ;
208+
209+ // 2. Kernel Index
210+ // Row: (kr + row_ker_offset)
211+ // Col: (kc + col_ker_offset)
212+ // InChannel: ch
213+ // OutChannel: f
214+ size_t k_r = kr + row_ker_offset ;
215+ size_t k_c = kc + col_ker_offset ;
216+
217+ size_t ker_idx = k_r * k_step_row + k_c * k_step_col + ch * k_step_in + f ;
218+
219+ // 3. Accumulate
220+ output -> array [out_idx ] += kernel -> array [ker_idx ] * input_val ;
221+ }
222+ }
223+ }
224+ }
225+ }
226+ }
227+
228+ k2c_bias_add (output , bias );
229+ activation (output -> array , output -> numel );
230+ }
0 commit comments