1010
1111#include < string.h>
1212
13- #include < CL /sycl.hpp>
13+ #include < sycl /sycl.hpp>
1414#include < cstdlib>
1515#include < memory>
1616#include < thread>
2424#define B_VALUE 0 .256f
2525#define MAX_EPS 1 .0e-4f
2626
27+ static bool verbose = false ;
28+
2729static float Check (const std::vector<float >& a, float value) {
2830 assert (value > MAX_EPS);
2931
@@ -86,7 +88,9 @@ static float RunAndCheck(sycl::queue queue, const std::vector<float>& a,
8688 throw ;
8789 }
8890
89- std::cout << " \t Matrix multiplication time: " << time << " sec" << std::endl;
91+ if (verbose) {
92+ std::cout << " \t Matrix multiplication time: " << time << " sec" << std::endl;
93+ }
9094
9195 return Check (c, expected_result);
9296}
@@ -97,8 +101,10 @@ static void Compute(sycl::queue queue, const std::vector<float>& a,
97101 float expected_result) {
98102 for (unsigned i = 0 ; i < repeat_count; ++i) {
99103 float eps = RunAndCheck (queue, a, b, c, size, expected_result);
100- std::cout << " Results are " << ((eps < MAX_EPS) ? " " : " IN" )
104+ if (verbose) {
105+ std::cout << " Results are " << ((eps < MAX_EPS) ? " " : " IN" )
101106 << " CORRECT with accuracy: " << eps << std::endl;
107+ }
102108 }
103109}
104110
@@ -122,45 +128,61 @@ const unsigned max_thread_count = 64;
122128const unsigned max_size = 8192 ;
123129const unsigned min_size = 32 ;
124130
131+ const unsigned default_size = 1024 ;
132+ const unsigned default_thread_count = 2 ;
133+ const unsigned default_repetition_per_thread = 4 ;
134+
125135void Usage (const char * name) {
126136
127- std::cout << " Calculating floating point matrix multiply on gpu, submitting the work from many CPU threads\n " ;
128- std::cout << name << " [ [number of threads, default=2, max=" << max_thread_count
129- << " ], [matrix size, default=1024, max=" << max_size << " ], [repetition count, default=4]] \n " ;
137+ std::cout << " Calculating floating point matrix multiply on gpu, submitting the work from many CPU threads\n "
138+ << " Usage " << name << " [ options ]" << std::endl;
139+ std::cout <<
140+ " --threads [-t] integer " <<
141+ " Threads number, default: " << default_thread_count << std::endl;
142+ std::cout <<
143+ " --size [-s] integer " <<
144+ " Matrix size, default: " << default_size << std::endl;
145+ std::cout <<
146+ " --repeat [-r] integer " <<
147+ " Repetition number per thread, default: " << default_repetition_per_thread << std::endl;
148+ std::cout <<
149+ " --verbose [-v] " <<
150+ " Enable verbose mode to report the app progress, default: off" << std::endl;
130151}
131152
132153int main (int argc, char * argv[]) {
133154
134155 int exit_code = EXIT_SUCCESS;
135- unsigned thread_count = 2 ;
136- unsigned repeat_count = 4 ;
137- unsigned size = 1024 ;
138-
139- if (argc == 2 &&
140- ( strcmp (argv[1 ], " -?" ) == 0 or strcmp (argv[1 ], " -h" ) == 0 or strcmp (argv[1 ], " --help" ) == 0 ) ){
141- Usage (argv[0 ]);
142- return EXIT_SUCCESS;
143- }
156+ unsigned thread_count = default_thread_count;
157+ unsigned repeat_count = default_repetition_per_thread;
158+ unsigned size = default_size;
144159
145160 try {
146161 unsigned temp;
147- if (argc > 1 ) {
148- temp = std::stoul (argv[1 ]);
149- thread_count = (temp < 1 ) ? 1 :
150- (temp > max_thread_count) ? max_thread_count : temp;
151- }
152- if (argc > 2 ) {
153- temp = std::stoul (argv[2 ]);
154- size = (temp < min_size) ? min_size :
155- (temp > max_size) ? max_size : temp;
156- }
157-
158- if (argc > 3 ) {
159- temp = std::stoul (argv[3 ]);
160- repeat_count = (temp < 1 ) ? 1 : temp;
162+ for (uint32_t i=1 ; i < argc; i++) {
163+ if (strcmp (argv[i], " -s" ) == 0 || strcmp (argv[i], " --size" ) == 0 ){
164+ i++;
165+ temp = std::stoul (argv[i]);
166+ size = (temp < min_size) ? min_size : (temp > max_size) ? max_size : temp;
167+ } else if (strcmp (argv[i], " -t" ) == 0 || strcmp (argv[i], " --threads" ) == 0 ){
168+ i++;
169+ temp = std::stoul (argv[i]);
170+ thread_count = (temp < 1 ) ? 1 : (temp > max_thread_count) ? max_thread_count : temp;
171+ } else if (strcmp (argv[i], " -r" ) == 0 || strcmp (argv[i], " --repeat" ) == 0 ){
172+ i++;
173+ temp = std::stoul (argv[i]);
174+ repeat_count = (temp < 1 ) ? 1 : temp;
175+ } else if (strcmp (argv[i], " -v" ) == 0 || strcmp (argv[i], " --verbose" ) == 0 ){
176+ // verbosity off makes minimal the sample self output -
177+ // so profiling output won't be intermixed with the sample output
178+ // and could be analyzed by tests
179+ verbose = true ;
180+ } else {
181+ Usage (argv[0 ]);
182+ return EXIT_SUCCESS;
183+ }
161184 }
162185 }
163-
164186 catch (...) {
165187 Usage (argv[0 ]);
166188 return EXIT_FAILURE;
@@ -328,15 +350,17 @@ int main(int argc, char* argv[]) {
328350 auto end = std::chrono::steady_clock::now ();
329351 std::chrono::duration<float > time = end - start;
330352
331- std::cout << " \t -- Total execution time: " << time.count () << " sec" << std::endl;
353+ if (verbose) {
354+ std::cout << " \t -- Total execution time: " << time.count () << " sec" << std::endl;
355+ }
332356 };
333357
334358 std::cout << " DPC++ Matrix Multiplication (CPU threads: " << thread_count << " , matrix size: " << size << " x "
335359 << size << " , repeats: " << repeat_count << " times)" << std::endl;
336360 std::cout << " Target device: "
337361 << queue.get_info <sycl::info::queue::device>()
338362 .get_info <sycl::info::device::name>()
339- << std::endl;
363+ << std::endl << std::flush ;
340364
341365 std::vector<std::thread> the_threads;
342366 for (unsigned i=0 ; i<thread_count; i++) {
0 commit comments