Skip to content

Commit f19183f

Browse files
committed
added LDGM
1 parent 793262d commit f19183f

26 files changed

+3149
-162
lines changed

ldgm-coding/coding-session.h

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
/*
2+
* =====================================================================================
3+
*
4+
* Filename: coding-session.h
5+
*
6+
* Description: This abstract class represents a general coding session object.
7+
* Every class implementing a particular coding method/algorithm
8+
* must inherit from this class.
9+
*
10+
* Author: Milan Kabat, [email protected]
11+
*
12+
* =====================================================================================
13+
*/
14+
15+
#ifndef CODING_SESSION
16+
#define CODING_SESSION
17+
18+
#include <map>
19+
20+
/** \class Coding_session
21+
* \brief Abstract class Coding_session
22+
*
23+
* This abstract class represents a general coding session object. Every class implementing
24+
* a particular coding method/algorithm must inherit from this class.
25+
*/
26+
class Coding_session
27+
{
28+
public:
29+
30+
/**
31+
* Method encode should compute parity data for the given source data and
32+
* add suitable FEC parity header.
33+
*
34+
* @param source_data Data to encode
35+
* @param frame_size Size of the frame being encoded
36+
* @param buf_size Output parameter for storing total size of the returned buffer
37+
* @return Computed parity data with appropriate FEC parity header
38+
* */
39+
virtual char*
40+
encode_frame ( char* source_data, int frame_size, int* buf_size ) = 0;
41+
42+
/**
43+
* Method decode should recover original source data
44+
*
45+
* @param received_data Received data (source and parity)
46+
* @param buf_size Size of the received buffer
47+
* @param frame_size Output parameter for storing size of the decoded frame
48+
* @param valid_data Map storing pairs <offset, number of bytes> of received data
49+
* @return Recovered source data
50+
* */
51+
virtual char*
52+
decode_frame ( char* received_data, int buf_size, int* frame_size,
53+
std::map<int,int> valid_data) = 0;
54+
};
55+
56+
#endif
57+

ldgm-coding/gpu.cu

Lines changed: 295 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,295 @@
1+
/*
2+
* =====================================================================================
3+
*
4+
* Filename: hello.cu
5+
*
6+
* Description: CUDA test
7+
*
8+
* Version: 1.0
9+
* Created: 02/06/2012 03:54:42 PM
10+
* Revision: none
11+
* Compiler: gcc
12+
*
13+
* Author: Milan Kabat (), [email protected]
14+
* Company: FI MUNI
15+
*
16+
* =====================================================================================
17+
*/
18+
19+
#include <stdio.h>
20+
#include <cuda.h>
21+
22+
#include "timer-util.h"
23+
#include "gpu.cuh"
24+
25+
#define INTS_PER_THREAD 4
26+
27+
void make_compact_pcm ( char* pc_matrix, int* pcm, struct coding_params params );
28+
29+
void gpu_encode ( char* source_data, char* parity, int* pc_matrix, struct coding_params );
30+
31+
void gpu_decode ( char* received, int* error_vec, char* pc_matrix, struct coding_params );
32+
33+
__global__ void encode ( int* data, int* parity, int* pcm, struct coding_params params )
34+
{
35+
36+
//partial result
37+
extern __shared__ int shared_mem[];
38+
// int idx = blockIdx.x*blockDim.x + threadIdx.x;
39+
int num_packets = params.max_row_weight;
40+
int p_size = params.packet_size/4;
41+
42+
//in each iteration, we copy corresponding packet to shared memory and XOR its content
43+
//into partial result
44+
for ( int i = 0; i < num_packets; i++ )
45+
{
46+
int index = pcm [ blockIdx.x*(num_packets+2) + i ];
47+
48+
if ( index > -1 && index < params.k )
49+
{
50+
for ( int j = 0; j < INTS_PER_THREAD; j++ )
51+
{
52+
shared_mem [ threadIdx.x*INTS_PER_THREAD + j ] ^=
53+
data [ index*p_size + threadIdx.x*INTS_PER_THREAD + j ];
54+
}
55+
}
56+
}
57+
58+
for ( int j = 0; j < INTS_PER_THREAD; j++ )
59+
{
60+
parity [ blockIdx.x*p_size + threadIdx.x*INTS_PER_THREAD + j ] =
61+
shared_mem [ threadIdx.x*INTS_PER_THREAD + j];
62+
shared_mem [ threadIdx.x*INTS_PER_THREAD + j] = 0;
63+
}
64+
65+
__syncthreads();
66+
67+
}
68+
69+
__global__ void decode ( int* received, int* error_vec, int* pcm, struct coding_params params )
70+
{
71+
/* extern __shared__ int shared_mem[];
72+
*
73+
* int num_neighbours = params.max_row_weight + 2;
74+
*
75+
* int p_size = params.packet_size/sizeof(int);
76+
* int num_threads = p_size/4;
77+
*
78+
* //load the neighbouring packets into shared memory
79+
* int idx = blockIdx.x*blockDim.x + threadIdx.x;
80+
* for ( int i = 0; i < num_beighbours; i++ )
81+
* {
82+
* if ( threadIdx.x == 0 )
83+
* shared_mem[i] = pcm [ blockIdx.x*num_beighbours + i ];
84+
*
85+
* __syncthreads();
86+
*
87+
* if ( shared_mem[i] != -1 )
88+
* {
89+
* // shared_mem [ num_neighbours + threadIdx.x*4 + i ] =
90+
*
91+
*
92+
*
93+
*
94+
* }
95+
* }
96+
*
97+
* __syncthreads();
98+
* for ( int i = 0; i < (params.packet_size/sizeof(int))/num_threads; i++ )
99+
* received [ (params.k + blockIdx.x)*p_size + threadIdx.x*4 + i] =
100+
* pkts [ idx + i ];
101+
*
102+
* __syncthreads();
103+
*/
104+
105+
// for ( int i = 0; i < params.packet_size/sizeof(int); i++ )
106+
// received [ params.k*p_size + blockIdx.x*p_size + idx + i ] = pkts [ idx + i ];
107+
108+
109+
110+
}
111+
112+
void gpu_encode ( char* source_data, char* parity, int* pcm, struct coding_params params )
113+
{
114+
int* src_data_d;
115+
int* parity_data_d;
116+
int* pcm_d;
117+
short show_info = 0;
118+
119+
cudaError_t cuda_error;
120+
121+
struct cudaDeviceProp dev_prop;
122+
123+
cudaGetDeviceProperties (&dev_prop, 0);
124+
125+
if (show_info)
126+
{
127+
if (!dev_prop.canMapHostMemory)
128+
printf("Cannot map host memory.\n");
129+
printf ( "name: %s\n", dev_prop.name );
130+
printf ( "totalGlobalMem: %d MB\n", (unsigned int)dev_prop.totalGlobalMem/(1024*1024) );
131+
printf ( "sharedMemPerBlock: %d kB\n", (unsigned int)dev_prop.sharedMemPerBlock/1024 );
132+
printf ( "maxThreadsPerBlock: %d\n", dev_prop.maxThreadsPerBlock );
133+
printf ( "maxThreadsDim: %d\n", dev_prop.maxThreadsDim[0] );
134+
printf ( "maxThreadsDim: %d\n", dev_prop.maxThreadsDim[1] );
135+
printf ( "maxThreadsDim: %d\n", dev_prop.maxThreadsDim[2] );
136+
printf ( "maxGridSize: %d\n", dev_prop.maxGridSize[0] );
137+
}
138+
139+
// pcm = (int*) malloc (params.m*(params.max_row_weight+2)*sizeof(int*));
140+
141+
// make_compact_pcm ( pc_matrix, pcm, params );
142+
143+
cuda_error = cudaMalloc ( (void**) &src_data_d, params.k*params.packet_size);
144+
if ( cuda_error != cudaSuccess )
145+
printf ( "cudaMalloc returned %d\n", cuda_error );
146+
147+
cuda_error = cudaMalloc ( (void**) &parity_data_d, params.m*params.packet_size);
148+
if ( cuda_error != cudaSuccess )
149+
printf ( "cudaMalloc returned %d\n", cuda_error );
150+
151+
cuda_error = cudaMemset ( parity_data_d, 0, params.m*params.packet_size);
152+
if ( cuda_error != cudaSuccess )
153+
printf ( "cudaMemset returned %d\n", cuda_error );
154+
155+
cuda_error = cudaMemcpy ( src_data_d, source_data, params.k*params.packet_size,
156+
cudaMemcpyHostToDevice );
157+
if ( cuda_error != cudaSuccess )
158+
printf ( "cudaMemcpy returned %d\n", cuda_error );
159+
160+
cuda_error = cudaMalloc ( (void**) &pcm_d, params.m*(params.max_row_weight+2)*sizeof(int));
161+
if ( cuda_error != cudaSuccess )
162+
printf ( "cudaMalloc return %d\n", cuda_error );
163+
164+
cuda_error = cudaMemcpy ( pcm_d, pcm, sizeof(int)*params.m*(params.max_row_weight+2),
165+
cudaMemcpyHostToDevice );
166+
if ( cuda_error != cudaSuccess )
167+
printf ( "cudaMempcy return %d\n", cuda_error );
168+
cuda_error = cudaDeviceSynchronize();
169+
170+
if ( cuda_error != cudaSuccess )
171+
printf ( "cudaSyn returned %d\n", cuda_error );
172+
173+
174+
int block_size = (params.packet_size / sizeof(int))/INTS_PER_THREAD;
175+
int block_count = params.m;
176+
177+
int num_bytes_shared = params.packet_size;
178+
179+
180+
// for ( int i = 0; i < 1000; i++)
181+
encode <<< block_count, block_size, num_bytes_shared >>> (src_data_d, parity_data_d,
182+
pcm_d, params );
183+
184+
cuda_error = cudaGetLastError();
185+
if ( cuda_error != cudaSuccess )
186+
printf("kernel execution returned %d\n", cuda_error);
187+
188+
cudaThreadSynchronize();
189+
190+
191+
cudaMemcpy ( parity, parity_data_d, params.m*params.packet_size, cudaMemcpyDeviceToHost );
192+
cuda_error = cudaGetLastError();
193+
if ( cuda_error != cudaSuccess )
194+
printf("cudaMemcpy from device returned %d\n", cuda_error);
195+
196+
cudaFree(src_data_d);
197+
cudaFree(parity_data_d);
198+
199+
}
200+
201+
void gpu_decode ( char* received, int* error_vec, char* pc_matrix, struct coding_params params )
202+
{
203+
/* int* received_d;
204+
* int* pcm_d;
205+
* int* error_vec_d;
206+
* cudaError_t cuda_error;
207+
*
208+
* int k = params.k;
209+
* int m = params.m;
210+
* int packet_size = params.packet_size;
211+
*
212+
* int **pcm = make_compact_pcm ( pc_matrix, params );
213+
*
214+
* //alocate space and copy data to device
215+
* cuda_error = cudaMalloc ( (void**) &received_d, (k+m)*packet_size);
216+
* if ( cuda_error != cudaSuccess )
217+
* printf ( "cudaMalloc return %d\n", cuda_error );
218+
*
219+
* cuda_error = cudaMemcpy ( received_d, received, (k+m)*packet_size, cudaMemcpyHostToDevice );
220+
* if ( cuda_error != cudaSuccess )
221+
* printf ( "cudaMempcy return %d\n", cuda_error );
222+
*
223+
* cuda_error = cudaMalloc ( (void**) &pcm_d, m*params.max_row_weight*sizeof(int));
224+
* if ( cuda_error != cudaSuccess )
225+
* printf ( "cudaMalloc return %d\n", cuda_error );
226+
*
227+
* cuda_error = cudaMemcpy ( pcm_d, pcm, sizeof(int)*m*params.max_row_weight,
228+
* cudaMemcpyHostToDevice );
229+
* if ( cuda_error != cudaSuccess )
230+
* printf ( "cudaMempcy return %d\n", cuda_error );
231+
*
232+
* cuda_error = cudaMalloc ( (void**) &error_vec_d, params.num_lost*sizeof(int));
233+
* if ( cuda_error != cudaSuccess )
234+
* printf ( "cudaMalloc return %d\n", cuda_error );
235+
*
236+
* cuda_error = cudaMemcpy ( pcm_d, pcm, params.num_lost*sizeof(int),
237+
* cudaMemcpyHostToDevice );
238+
* if ( cuda_error != cudaSuccess )
239+
* printf ( "cudaMempcy pcm return %d\n", cuda_error );
240+
*
241+
* int block_size = (packet_size/sizeof(int)) / 4;
242+
* int block_count = m;
243+
* int shared_mem_size = (packet_size + sizeof(int))*(params.max_row_weight+2);
244+
*
245+
* decode <<< block_count, block_size, shared_mem_size >>> (received_d, error_vec, pcm_d, params );
246+
*
247+
* cuda_error = cudaMemcpy ( received, received_d, (k+m)*packet_size, cudaMemcpyDeviceToHost );
248+
* if ( cuda_error != cudaSuccess )
249+
* printf ( "cudaMempcy from device return %d\n", cuda_error );
250+
*
251+
* cudaFree ( received_d );
252+
*/
253+
}
254+
255+
/* void make_compact_pcm ( char* pc_matrix, int* pcm, struct coding_params params)
256+
* {
257+
* //we need to create a compact representation of sparse pc_matrix
258+
*
259+
* int counter = 0;
260+
* int columns = params.max_row_weight + 2;
261+
*
262+
* for ( int i = 0; i < params.m; i++) {
263+
* for ( int j = 0; j < params.k; j++)
264+
* if ( pc_matrix[i*params.k + j] )
265+
* {
266+
* pcm[i*columns + counter] = j;
267+
* counter++;
268+
* }
269+
* //add indices from staircase matrix
270+
* pcm[i*columns + counter] = params.k + i;
271+
* counter++;
272+
*
273+
* if ( i > 0 )
274+
* {
275+
* pcm[i*columns + counter] = params.k + i - 1;
276+
* counter++;
277+
* }
278+
*
279+
* if ( counter < columns )
280+
* for ( int j = counter; j < columns; j++)
281+
* pcm[i*columns + j] = -1;
282+
* counter = 0;
283+
* }
284+
*
285+
*
286+
* for ( int i = 0; i < params.m; i++)
287+
* {
288+
* for ( int j = 0; j < columns; j++ )
289+
* printf ( "%d, ", pcm[i*columns + j] );
290+
* printf ( "\n" );
291+
* }
292+
*
293+
*
294+
* }
295+
*/

ldgm-coding/gpu.cuh

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
2+
void test(void);
3+
4+
5+
struct coding_params {
6+
int num_lost;
7+
int k;
8+
int m;
9+
int packet_size;
10+
int max_row_weight;
11+
};
12+
13+
void gpu_encode ( char*, char*, int*, struct coding_params );
14+
15+
void gpu_decode ( char*, int*, char*, struct coding_params );
16+

ldgm-coding/ldgm-fec-header.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/*
2+
* =====================================================================================
3+
*
4+
* Filename: fec-header.h
5+
*
6+
* Description: LDGM FEC source header and FEC parity header definition
7+
*
8+
* Version: 1.0
9+
* Created: 04/16/2012 06:55:04 PM
10+
* Revision: none
11+
* Compiler: gcc
12+
*
13+
* Author: Milan Kabat (), [email protected]
14+
* Organization:
15+
*
16+
* =====================================================================================
17+
*/
18+
19+
/** Structure defining fields in LDGM FEC block header */
20+
typedef struct {
21+
uint16_t sbn; /** Source block number */
22+
uint16_t esi; /** Sequence number of this data block in
23+
* the encoding block */
24+
uint16_t k; /** Number of source symbols in the encoding
25+
* block */
26+
uint16_t m; /** Number of parity symbols in the encoding
27+
* block */
28+
} fec_header_t;

0 commit comments

Comments
 (0)