@@ -16,7 +16,7 @@ public:
1616 const std::string DriverVersion = Device.get_info <device::driver_version>();
1717
1818 if (Device.is_gpu () && (DriverVersion.find (" CUDA" ) != std::string::npos)) {
19- std::cout << " CUDA device found " << std::endl ;
19+ std::cout << " CUDA device found \n " ;
2020 return 1 ;
2121 };
2222 return -1 ;
@@ -41,23 +41,23 @@ int main(int argc, char *argv[]) {
4141 // Create a SYCL context for interoperability with CUDA Runtime API
4242 // This is temporary until the property extension is implemented
4343 const bool UsePrimaryContext = true ;
44- sycl:: device dev{CUDASelector ().select_device ()};
45- sycl:: context myContext{dev, {}, UsePrimaryContext};
46- sycl:: queue myQueue{myContext, dev};
44+ device dev{CUDASelector ().select_device ()};
45+ context myContext{dev, {}, UsePrimaryContext};
46+ queue myQueue{myContext, dev};
4747
4848 {
4949 buffer<double > bA{range<1 >(n)};
5050 buffer<double > bB{range<1 >(n)};
5151 buffer<double > bC{range<1 >(n)};
5252
5353 {
54- auto h_a = bA.get_access <access::mode::write>();
55- auto h_b = bB.get_access <access::mode::write>();
54+ auto hA = bA.get_access <access::mode::write>();
55+ auto hB = bB.get_access <access::mode::write>();
5656
5757 // Initialize vectors on host
5858 for (int i = 0 ; i < n; i++) {
59- h_a [i] = sin (i) * sin (i);
60- h_b [i] = cos (i) * cos (i);
59+ hA [i] = sin (i) * sin (i);
60+ hB [i] = cos (i) * cos (i);
6161 }
6262 }
6363
@@ -68,28 +68,29 @@ int main(int argc, char *argv[]) {
6868 auto accC = bC.get_access <access::mode::write>(h);
6969
7070 h.interop_task ([=](interop_handler ih) {
71- auto d_a = reinterpret_cast <double *>(ih.get_mem <backend::cuda>(accA));
72- auto d_b = reinterpret_cast <double *>(ih.get_mem <backend::cuda>(accB));
73- auto d_c = reinterpret_cast <double *>(ih.get_mem <backend::cuda>(accC));
71+ auto dA = reinterpret_cast <double *>(ih.get_mem <backend::cuda>(accA));
72+ auto dB = reinterpret_cast <double *>(ih.get_mem <backend::cuda>(accB));
73+ auto dC = reinterpret_cast <double *>(ih.get_mem <backend::cuda>(accC));
7474
7575 int blockSize, gridSize;
7676 // Number of threads in each thread block
7777 blockSize = 1024 ;
7878 // Number of thread blocks in grid
79- gridSize = ( int ) ceil (( float )n / blockSize);
79+ gridSize = static_cast < int >( ceil (static_cast < float >(n) / blockSize) );
8080 // Call the CUDA kernel directly from SYCL
81- vecAdd<<<gridSize, blockSize>>> (d_a, d_b, d_c , n);
81+ vecAdd<<<gridSize, blockSize>>> (dA, dB, dC , n);
8282 });
8383 });
8484
8585 {
86- auto h_c = bC.get_access <access::mode::read>();
86+ auto hC = bC.get_access <access::mode::read>();
8787 // Sum up vector c and print result divided by n, this should equal 1 within
8888 // error
8989 double sum = 0 ;
90- for (int i = 0 ; i < n; i++)
91- sum += h_c[i];
92- printf (" final result: %f\n " , sum / n);
90+ for (int i = 0 ; i < n; i++) {
91+ sum += hC[i];
92+ }
93+ std::cout << " Final result " << sum / n << std::endl;
9394 }
9495 }
9596
0 commit comments