@@ -50,8 +50,8 @@ decltype(&clReadStream) xcl::Stream::readStream = nullptr;
5050decltype (&clWriteStream) xcl::Stream::writeStream = nullptr;
5151decltype (&clPollStreams) xcl::Stream::pollStreams = nullptr;
5252
53- auto constexpr c_test_size = 256 * 1024 * 1024 ; // 256 MB data
54- auto constexpr num_streams = 32 ;
53+ auto constexpr Block_Size = 256 * 1024 ; // 256 K integer per block
54+ auto constexpr num_of_Blocks = 1024 ;
5555
5656// //////////////////RESET FUNCTION//////////////////////////////////
5757int reset (int *a, int *sw_results, int *hw_results, unsigned int size) {
@@ -68,23 +68,24 @@ bool verify(int *sw_results, int *hw_results, int size) {
6868 for (int i = 0 ; i < size; i++) {
6969 if (sw_results[i] != hw_results[i]) {
7070 match = false ;
71+ std::cout << i << " \n " << sw_results[i] << " \n " << hw_results[i] << " \n " ;
7172 break ;
72- std::cout << sw_results[i] << " \n " << hw_results[i] << " \n " ;
7373 }
7474 }
7575 std::cout << " TEST " << (match ? " PASSED" : " FAILED" ) << std::endl;
7676 return match;
7777}
7878// //////MAIN FUNCTION//////////
7979int main (int argc, char **argv) {
80- unsigned int size = c_test_size;
8180
82- if ( xcl::is_hw_emulation ()) {
83- size = 4096 ; // 4KB for HW emulation
84- } else if (xcl::is_emulation ()) {
85- size = 2 * 1024 * 1024 ; // 2MB for sw emulation
81+ unsigned int num_Blocks = num_of_Blocks;
82+
83+ if (xcl::is_emulation ()) {
84+ num_Blocks = 2 ;
8685 }
8786
87+ unsigned int size = num_Blocks * Block_Size;
88+
8889 // I/O Data Vectors
8990 std::vector<int , aligned_allocator<int >> h_a (size);
9091 std::vector<int , aligned_allocator<int >> hw_results (size);
@@ -100,14 +101,13 @@ int main(int argc, char **argv) {
100101 auto binaryFile = argv[1 ];
101102 std::cout << " \n Vector Addition of elements " << size << std::endl;
102103
103- // Bytes per CU Stream
104- int vector_size_bytes = sizeof (int ) * size / num_streams ;
104+ // Bytes per Block
105+ int vector_size_bytes = sizeof (int ) * Block_Size ;
105106
106107 // OpenCL Host Code Begins
107108 cl_int err;
108- std::string cu_id;
109109 cl::Kernel increment;
110- int no_of_elem = size / num_streams ;
110+ int no_of_elem = Block_Size ;
111111 cl::CommandQueue q;
112112 cl::Context context;
113113 cl::Device device;
@@ -156,8 +156,8 @@ int main(int argc, char **argv) {
156156 xcl::Stream::init (platform_id);
157157
158158 // Streams
159- cl_stream h2c_stream_a ;
160- cl_stream c2h_stream ;
159+ cl_stream h2c_Stream_a ;
160+ cl_stream c2h_Stream ;
161161
162162 cl_int ret;
163163
@@ -169,17 +169,17 @@ int main(int argc, char **argv) {
169169 // Create write stream for argument 0 of kernel
170170 ext.flags = 0 ;
171171 OCL_CHECK (ret,
172- h2c_stream_a = xcl::Stream::createStream (
172+ h2c_Stream_a = xcl::Stream::createStream (
173173 device.get (), XCL_STREAM_READ_ONLY, CL_STREAM, &ext, &ret));
174174
175175 // Create read stream for argument 1 of kernel
176176 ext.flags = 1 ;
177- OCL_CHECK (ret, c2h_stream = xcl::Stream::createStream (device.get (),
177+ OCL_CHECK (ret, c2h_Stream = xcl::Stream::createStream (device.get (),
178178 XCL_STREAM_WRITE_ONLY,
179179 CL_STREAM, &ext, &ret));
180180
181181 // Sync for the async streaming
182- int num_compl = 2 * num_streams ;
182+ int num_compl = 2 * num_Blocks ;
183183
184184 // Checking the request completions
185185 cl_streams_poll_req_completions *poll_req;
@@ -194,28 +194,28 @@ int main(int argc, char **argv) {
194194 wr_req.flags = CL_STREAM_EOT | CL_STREAM_NONBLOCKING;
195195
196196 auto total_start = std::chrono::high_resolution_clock::now ();
197- for (int i = 0 ; i < num_streams ; i++) {
197+ for (unsigned int i = 0 ; i < num_Blocks ; i++) {
198198
199199 auto write_tag_a = " write_a_" + std::to_string (i);
200200 wr_req.priv_data = (void *)write_tag_a.c_str ();
201201
202- std::cout << " \n Writing Stream h2c_stream_a [" << i << " ]" ;
203- OCL_CHECK (ret, xcl::Stream::writeStream (h2c_stream_a ,
202+ std::cout << " \n Writing Block h2c_Stream_a [" << i << " ]" ;
203+ OCL_CHECK (ret, xcl::Stream::writeStream (h2c_Stream_a ,
204204 (h_a.data () + i * no_of_elem),
205205 vector_size_bytes, &wr_req, &ret));
206206
207207 auto read_tag = " read_" + std::to_string (i);
208208 rd_req.priv_data = (void *)read_tag.c_str ();
209209
210- std::cout << " \n Reading Stream c2h_stream [" << i << " ]" ;
211- OCL_CHECK (ret, xcl::Stream::readStream (c2h_stream ,
210+ std::cout << " \n Reading Block c2h_Stream [" << i << " ]" ;
211+ OCL_CHECK (ret, xcl::Stream::readStream (c2h_Stream ,
212212 (hw_results.data () + i * no_of_elem),
213213 vector_size_bytes, &rd_req, &ret));
214214 }
215215
216216 // Checking the request completions
217217 std::cout << " \n clPollStreams for (" << num_compl
218- << " ) events (CU : " << num_streams
218+ << " ) events (Blocks : " << num_Blocks
219219 << " , axis_in: 1, axis_out: 1)\n " ;
220220 OCL_CHECK (ret, xcl::Stream::pollStreams (device.get (), poll_req, num_compl,
221221 num_compl, &num_compl, 50000 , &ret));
@@ -224,15 +224,18 @@ int main(int argc, char **argv) {
224224 auto total_end = std::chrono::high_resolution_clock::now ();
225225 auto duration =
226226 std::chrono::duration<double , std::nano>(total_end - total_start);
227- double throput = ((double )size * sizeof (double )) / (double )duration.count ();
228- std::cout << " [ Case: 1 ] -> Throughput = " << throput << " GB/s\n " ;
227+ double throput =
228+ ((double )size * sizeof (int ) * 2 ) /
229+ (double )
230+ duration.count (); // Multiplied by 2 because read and write both done
231+ std::cout << " Throughput = " << throput << " GB/s\n " ;
229232
230233 // Compare the device results with software results
231234 bool match = verify (sw_results.data (), hw_results.data (), size);
232235
233236 // Releasing all OpenCL objects
234- xcl::Stream::releaseStream (c2h_stream );
235- xcl::Stream::releaseStream (h2c_stream_a );
237+ xcl::Stream::releaseStream (c2h_Stream );
238+ xcl::Stream::releaseStream (h2c_Stream_a );
236239
237240 return match ? EXIT_SUCCESS : EXIT_FAILURE;
238241}
0 commit comments