@@ -146,33 +146,35 @@ void benchmark_device(const Device_Info& device_info) {
146146 println (" \r | Memory Bandwidth (misaligned read ) " +alignr (29u , to_string (4 .0f *(float )N*(float )M/(float )(time_mr-time_cw/(double )M)*1E-9f , 2u ))+" GB/s |" );
147147 println (" \r | Memory Bandwidth (misaligned write) " +alignr (29u , to_string (4 .0f *(float )N*(float )M/(float ) time_mw *1E-9f , 2u ))+" GB/s |" );
148148
149- print (" | Benchmarking ... |" );
150- for (uint i=0u ; i<N_memory; i++) {
151- clock.start ();
152- buffer.write_to_device ();
153- time_send = fmin (clock.stop (), time_send);
154- }
155- const float bw_send = 4 .0f *M*N/(float )time_send*1E-9f ;
156- println (" \r | PCIe Bandwidth (send ) " +alignr (29u , to_string (bw_send, 2u ))+" GB/s |" );
157- print (" | Benchmarking ... |" );
158- for (uint i=0u ; i<N_memory; i++) {
159- clock.start ();
160- buffer.read_from_device ();
161- time_receive = fmin (clock.stop (), time_receive);
162- }
163- const float bw_receive = 4 .0f *M*N/(float )time_receive*1E-9f ;
164- println (" \r | PCIe Bandwidth ( receive ) " +alignr (29u , to_string (bw_receive, 2u ))+" GB/s |" );
165- print (" | Benchmarking ... |" );
166- for (uint i=0u ; i<N_memory; i++) {
167- clock.start ();
168- buffer.read_from_device (N*M/2u , N*M, false );
169- buffer.write_to_device (0u , N*M/2u , false );
170- buffer.finish_queue ();
171- time_bidirectional = fmin (clock.stop (), time_bidirectional);
149+ if (!device.info .uses_ram ) {
150+ print (" | Benchmarking ... |" );
151+ for (uint i=0u ; i<N_memory; i++) {
152+ clock.start ();
153+ buffer.write_to_device ();
154+ time_send = fmin (clock.stop (), time_send);
155+ }
156+ const float bw_send = 4 .0f *M*N/(float )time_send*1E-9f ;
157+ println (" \r | PCIe Bandwidth (send ) " +alignr (29u , to_string (bw_send, 2u ))+" GB/s |" );
158+ print (" | Benchmarking ... |" );
159+ for (uint i=0u ; i<N_memory; i++) {
160+ clock.start ();
161+ buffer.read_from_device ();
162+ time_receive = fmin (clock.stop (), time_receive);
163+ }
164+ const float bw_receive = 4 .0f *M*N/(float )time_receive*1E-9f ;
165+ println (" \r | PCIe Bandwidth ( receive ) " +alignr (29u , to_string (bw_receive, 2u ))+" GB/s |" );
166+ print (" | Benchmarking ... |" );
167+ for (uint i=0u ; i<N_memory; i++) {
168+ clock.start ();
169+ buffer.read_from_device (N*M/2u , N*M, false );
170+ buffer.write_to_device (0u , N*M/2u , false );
171+ buffer.finish_queue ();
172+ time_bidirectional = fmin (clock.stop (), time_bidirectional);
173+ }
174+ const float bw_bidirectional = 4 .0f *M*N/(float )time_bidirectional*1E-9f ;
175+ const float bw_max = fmax (2 .0f *fmax (bw_send, bw_receive), bw_bidirectional);
176+ println (" \r | PCIe Bandwidth ( bidirectional) (Gen" +to_string (bw_max>17 .6f ?4 :bw_max>8 .8f ?3 :bw_max>4 .4f ?2 :1 )+" x16)" +alignr (8u , to_string (bw_bidirectional, 2u ))+" GB/s |" );
172177 }
173- const float bw_bidirectional = 4 .0f *M*N/(float )time_bidirectional*1E-9f ;
174- const float bw_max = fmax (2 .0f *fmax (bw_send, bw_receive), bw_bidirectional);
175- println (" \r | PCIe Bandwidth ( bidirectional) (Gen" +to_string (bw_max>17 .6f ?4 :bw_max>8 .8f ?3 :bw_max>4 .4f ?2 :1 )+" x16)" +alignr (8u , to_string (bw_bidirectional, 2u ))+" GB/s |" );
176178
177179 println (" |-----------------------------------------------------------------------------|" );
178180}
0 commit comments