CESNET
diff --git a/‎ldgm-coding/coding-session.h‎
Lines changed: 57 additions & 0 deletions b/‎ldgm-coding/coding-session.h‎
Lines changed: 57 additions & 0 deletions
diff --git a/‎ldgm-coding/gpu.cu‎
Lines changed: 295 additions & 0 deletions b/‎ldgm-coding/gpu.cu‎
Lines changed: 295 additions & 0 deletions
diff --git a/‎ldgm-coding/gpu.cuh‎
Lines changed: 16 additions & 0 deletions b/‎ldgm-coding/gpu.cuh‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎ldgm-coding/ldgm-fec-header.h‎
Lines changed: 28 additions & 0 deletions b/‎ldgm-coding/ldgm-fec-header.h‎
Lines changed: 28 additions & 0 deletions
@@ -0,0 +1,57 @@
+/*
+ * =====================================================================================
+ *
+ *       Filename:  coding-session.h
+ *
+ *    Description:  This abstract class represents a general coding session object.
+ *                  Every class implementing a particular coding method/algorithm
+ *                  must inherit from this class.
+ *
+ *         Author:  Milan Kabat, [email protected]
+ *
+ * =====================================================================================
+ */
+
+#ifndef CODING_SESSION
+#define CODING_SESSION
+
+#include <map>
+
+/** \class Coding_session
+ *  \brief Abstract class Coding_session
+ *   
+ *   This abstract class represents a general coding session object. Every class implementing
+ *   a particular coding method/algorithm must inherit from this class.
+ */
+class Coding_session
+{
+    public:
+
+	/**
+	 * Method encode should compute parity data for the given source data and
+	 * add suitable FEC parity header.
+	 *
+	 * @param source_data Data to encode
+	 * @param frame_size Size of the frame being encoded
+	 * @param buf_size Output parameter for storing total size of the returned buffer
+	 * @return Computed parity data with appropriate FEC parity header
+	 * */
+	virtual char*
+	    encode_frame ( char* source_data, int frame_size, int* buf_size ) = 0;
+
+	/**
+	 * Method decode should recover original source data
+	 *
+	 * @param received_data Received data (source and parity)
+	 * @param buf_size Size of the received buffer
+	 * @param frame_size Output parameter for storing size of the decoded frame
+	 * @param valid_data Map storing pairs <offset, number of bytes> of received data
+	 * @return Recovered source data
+	 * */
+	virtual char*
+	    decode_frame ( char* received_data, int buf_size, int* frame_size, 
+		    std::map<int,int> valid_data) = 0;
+};
+
+#endif
+
@@ -0,0 +1,295 @@
+/*
+ * =====================================================================================
+ *
+ *       Filename:  hello.cu
+ *
+ *    Description:  CUDA test
+ *
+ *        Version:  1.0
+ *        Created:  02/06/2012 03:54:42 PM
+ *       Revision:  none
+ *       Compiler:  gcc
+ *
+ *         Author:  Milan Kabat (), [email protected]
+ *        Company:  FI MUNI
+ *
+ * =====================================================================================
+ */
+
+#include <stdio.h>
+#include <cuda.h>
+
+#include "timer-util.h"
+#include "gpu.cuh"
+
+#define  INTS_PER_THREAD 4
+
+void make_compact_pcm ( char* pc_matrix, int* pcm, struct coding_params params );
+
+void gpu_encode ( char* source_data, char* parity, int* pc_matrix, struct coding_params );
+
+void gpu_decode ( char* received, int* error_vec, char* pc_matrix, struct coding_params );
+
+__global__ void encode ( int* data, int* parity, int* pcm, struct coding_params params )
+{
+
+    //partial result
+    extern __shared__ int shared_mem[];
+//    int idx = blockIdx.x*blockDim.x + threadIdx.x;
+    int num_packets = params.max_row_weight;
+    int p_size = params.packet_size/4;
+
+    //in each iteration, we copy corresponding packet to shared memory and XOR its content
+    //into partial result
+    for ( int i = 0; i < num_packets; i++ ) 
+    { 
+	int index = pcm [ blockIdx.x*(num_packets+2) + i ];
+	
+	if ( index > -1 && index < params.k )
+ 	{
+	    for ( int j = 0; j < INTS_PER_THREAD; j++ )
+ 	    {
+		shared_mem [ threadIdx.x*INTS_PER_THREAD + j ] ^= 
+		    data [ index*p_size + threadIdx.x*INTS_PER_THREAD  + j ];
+	    }
+	}
+    }
+
+    for ( int j = 0; j < INTS_PER_THREAD; j++ )
+     {
+	parity [ blockIdx.x*p_size + threadIdx.x*INTS_PER_THREAD + j ] = 
+	    shared_mem [ threadIdx.x*INTS_PER_THREAD + j];
+	shared_mem [ threadIdx.x*INTS_PER_THREAD + j] = 0;
+    }
+
+    __syncthreads();
+    
+} 
+
+__global__ void decode ( int* received, int* error_vec, int* pcm, struct coding_params params )
+{
+/*     extern __shared__ int shared_mem[];
+ * 
+ *     int num_neighbours = params.max_row_weight + 2;
+ * 
+ *     int p_size = params.packet_size/sizeof(int);
+ *     int num_threads = p_size/4;
+ * 
+ *     //load the neighbouring packets into shared memory
+ *     int idx = blockIdx.x*blockDim.x + threadIdx.x;
+ *     for ( int i = 0; i < num_beighbours; i++ )
+ *     {
+ * 	if ( threadIdx.x == 0 )
+ * 	    shared_mem[i] =  pcm [ blockIdx.x*num_beighbours + i ];
+ * 
+ * 	__syncthreads();
+ * 	
+ * 	if ( shared_mem[i] != -1 ) 
+ * 	{
+ * //	    shared_mem [ num_neighbours + threadIdx.x*4 + i ] =
+ * 
+ * 
+ * 
+ * 
+ * 	}
+ *     }
+ * 
+ *     __syncthreads();
+ *     for ( int i = 0; i < (params.packet_size/sizeof(int))/num_threads; i++ )
+ * 	received [ (params.k + blockIdx.x)*p_size + threadIdx.x*4 + i] = 
+ * 	    pkts [ idx + i ];
+ *    
+ *     __syncthreads();
+ */
+
+//    for ( int i = 0; i < params.packet_size/sizeof(int); i++ )
+//	received [ params.k*p_size + blockIdx.x*p_size + idx + i ] = pkts [ idx + i ];
+
+
+
+}
+
+void gpu_encode ( char* source_data, char* parity, int* pcm, struct coding_params params ) 
+{
+    int* src_data_d;
+    int* parity_data_d;
+    int* pcm_d;
+    short show_info = 0;
+
+    cudaError_t cuda_error;
+
+    struct cudaDeviceProp dev_prop;
+
+    cudaGetDeviceProperties (&dev_prop, 0);
+
+    if (show_info)
+    {
+	if (!dev_prop.canMapHostMemory)
+	    printf("Cannot map host memory.\n");
+	printf ( "name: %s\n", dev_prop.name );
+	printf ( "totalGlobalMem: %d MB\n", (unsigned int)dev_prop.totalGlobalMem/(1024*1024) );
+	printf ( "sharedMemPerBlock: %d kB\n", (unsigned int)dev_prop.sharedMemPerBlock/1024 );
+	printf ( "maxThreadsPerBlock: %d\n", dev_prop.maxThreadsPerBlock );
+	printf ( "maxThreadsDim: %d\n", dev_prop.maxThreadsDim[0] );
+	printf ( "maxThreadsDim: %d\n", dev_prop.maxThreadsDim[1] );
+	printf ( "maxThreadsDim: %d\n", dev_prop.maxThreadsDim[2] );
+	printf ( "maxGridSize: %d\n", dev_prop.maxGridSize[0] );
+    }
+
+//    pcm = (int*) malloc (params.m*(params.max_row_weight+2)*sizeof(int*));
+
+//    make_compact_pcm ( pc_matrix, pcm,  params );
+
+    cuda_error = cudaMalloc ( (void**) &src_data_d, params.k*params.packet_size);
+    if ( cuda_error != cudaSuccess )
+	printf ( "cudaMalloc returned %d\n", cuda_error );
+
+    cuda_error = cudaMalloc ( (void**) &parity_data_d, params.m*params.packet_size);
+    if ( cuda_error != cudaSuccess )
+	printf ( "cudaMalloc returned %d\n", cuda_error );
+
+    cuda_error = cudaMemset ( parity_data_d, 0, params.m*params.packet_size);
+    if ( cuda_error != cudaSuccess )
+	printf ( "cudaMemset returned %d\n", cuda_error );
+
+    cuda_error = cudaMemcpy ( src_data_d, source_data, params.k*params.packet_size, 
+	    cudaMemcpyHostToDevice );
+    if ( cuda_error != cudaSuccess )
+	printf ( "cudaMemcpy returned %d\n", cuda_error );
+
+    cuda_error = cudaMalloc ( (void**) &pcm_d, params.m*(params.max_row_weight+2)*sizeof(int));
+    if ( cuda_error != cudaSuccess )
+	printf ( "cudaMalloc return %d\n", cuda_error );
+
+    cuda_error = cudaMemcpy ( pcm_d, pcm, sizeof(int)*params.m*(params.max_row_weight+2), 
+	    cudaMemcpyHostToDevice );
+    if ( cuda_error != cudaSuccess )
+	printf ( "cudaMempcy return %d\n", cuda_error );
+    cuda_error = cudaDeviceSynchronize();
+
+    if ( cuda_error != cudaSuccess )
+	printf ( "cudaSyn returned %d\n", cuda_error );
+
+
+    int block_size = (params.packet_size / sizeof(int))/INTS_PER_THREAD;
+    int block_count = params.m;
+
+    int num_bytes_shared = params.packet_size;
+
+
+//    for ( int i = 0; i < 1000; i++)
+    encode <<< block_count, block_size, num_bytes_shared >>> (src_data_d, parity_data_d, 
+	    pcm_d, params );
+
+    cuda_error = cudaGetLastError();
+    if ( cuda_error != cudaSuccess )
+	printf("kernel execution returned %d\n", cuda_error);
+
+    cudaThreadSynchronize();
+
+
+    cudaMemcpy ( parity, parity_data_d, params.m*params.packet_size, cudaMemcpyDeviceToHost );
+    cuda_error = cudaGetLastError();
+    if ( cuda_error != cudaSuccess )
+	printf("cudaMemcpy from device returned %d\n", cuda_error);
+
+    cudaFree(src_data_d);
+    cudaFree(parity_data_d);
+
+} 
+
+void gpu_decode ( char* received, int* error_vec, char* pc_matrix, struct coding_params params )
+{ 
+/*     int* received_d;
+ *     int* pcm_d;
+ *     int* error_vec_d;
+ *     cudaError_t cuda_error;
+ * 
+ *     int k = params.k;
+ *     int m = params.m;
+ *     int packet_size = params.packet_size;
+ * 
+ *     int **pcm = make_compact_pcm ( pc_matrix, params );
+ * 
+ *     //alocate space and copy data to device
+ *     cuda_error = cudaMalloc ( (void**) &received_d, (k+m)*packet_size);
+ *     if ( cuda_error != cudaSuccess )
+ * 	printf ( "cudaMalloc return %d\n", cuda_error );
+ * 
+ *     cuda_error = cudaMemcpy ( received_d, received, (k+m)*packet_size, cudaMemcpyHostToDevice );
+ *     if ( cuda_error != cudaSuccess )
+ * 	printf ( "cudaMempcy return %d\n", cuda_error );
+ *     
+ *     cuda_error = cudaMalloc ( (void**) &pcm_d, m*params.max_row_weight*sizeof(int));
+ *     if ( cuda_error != cudaSuccess )
+ * 	printf ( "cudaMalloc return %d\n", cuda_error );
+ * 
+ *     cuda_error = cudaMemcpy ( pcm_d, pcm, sizeof(int)*m*params.max_row_weight, 
+ * 	    cudaMemcpyHostToDevice );
+ *     if ( cuda_error != cudaSuccess )
+ * 	printf ( "cudaMempcy return %d\n", cuda_error );
+ * 
+ *     cuda_error = cudaMalloc ( (void**) &error_vec_d, params.num_lost*sizeof(int));
+ *     if ( cuda_error != cudaSuccess )
+ * 	printf ( "cudaMalloc return %d\n", cuda_error );
+ * 
+ *     cuda_error = cudaMemcpy ( pcm_d, pcm, params.num_lost*sizeof(int),
+ * 	    cudaMemcpyHostToDevice );
+ *     if ( cuda_error != cudaSuccess )
+ * 	printf ( "cudaMempcy pcm return %d\n", cuda_error );
+ * 
+ *     int block_size = (packet_size/sizeof(int)) / 4;
+ *     int block_count  = m;
+ *     int shared_mem_size = (packet_size + sizeof(int))*(params.max_row_weight+2);
+ * 
+ *     decode <<< block_count, block_size, shared_mem_size >>> (received_d, error_vec, pcm_d, params );
+ * 
+ *     cuda_error = cudaMemcpy ( received, received_d, (k+m)*packet_size, cudaMemcpyDeviceToHost );
+ *     if ( cuda_error != cudaSuccess )
+ * 	printf ( "cudaMempcy from device return %d\n", cuda_error );
+ * 
+ *     cudaFree ( received_d );
+ */
+}
+
+/* void make_compact_pcm ( char* pc_matrix, int* pcm, struct coding_params params)
+ * {
+ *     //we need to create a compact representation of sparse pc_matrix
+ * 
+ *     int counter = 0;
+ *     int columns = params.max_row_weight + 2;
+ * 
+ *     for ( int i = 0; i < params.m; i++) {
+ * 	for ( int j = 0; j < params.k; j++)
+ * 	    if ( pc_matrix[i*params.k + j] )
+ * 	    {
+ * 		pcm[i*columns + counter] = j;
+ * 		counter++;
+ * 	    }
+ * 	//add indices from staircase matrix
+ * 	pcm[i*columns + counter] = params.k + i;
+ * 	counter++;
+ * 	
+ * 	if ( i > 0 )
+ * 	{
+ * 	    pcm[i*columns + counter] = params.k + i - 1;
+ * 	    counter++;
+ * 	}
+ * 
+ * 	if ( counter < columns )
+ * 	    for ( int j = counter; j < columns; j++)
+ * 		pcm[i*columns + j] = -1;
+ * 	counter = 0;
+ *     }
+ * 
+ * 
+ *      for ( int i = 0; i < params.m; i++)
+ *       {
+ *   	for ( int j = 0; j < columns; j++ )
+ *   	    printf ( "%d, ", pcm[i*columns + j] );
+ *   	printf ( "\n" );
+ *       }
+ *  
+ * 
+ * }
+ */
@@ -0,0 +1,16 @@
+
+void test(void);
+
+
+struct coding_params {
+    int num_lost;
+    int k;
+    int m;
+    int packet_size;
+    int max_row_weight;
+};
+
+void gpu_encode ( char*, char*, int*, struct coding_params );
+
+void gpu_decode ( char*, int*, char*, struct coding_params );
+
@@ -0,0 +1,28 @@
+/*
+ * =====================================================================================
+ *
+ *       Filename:  fec-header.h
+ *
+ *    Description:  LDGM FEC source header and FEC parity header definition
+ *
+ *        Version:  1.0
+ *        Created:  04/16/2012 06:55:04 PM
+ *       Revision:  none
+ *       Compiler:  gcc
+ *
+ *         Author:  Milan Kabat (), [email protected]
+ *   Organization:  
+ *
+ * =====================================================================================
+ */
+
+/** Structure defining fields in LDGM FEC block header */
+typedef struct {
+    uint16_t sbn;                               /** Source block number */
+    uint16_t esi;                               /** Sequence number of this data block in
+						 *  the encoding block */
+    uint16_t k;                                 /** Number of source symbols in the encoding
+    						  * block */
+    uint16_t m;                                 /** Number of parity symbols in the encoding
+    						  * block */
+} fec_header_t;