77 */
88
99#include < cstdio>
10+ #include < sstream>
11+ #include < fstream>
1012#include < stream_compaction/cpu.h>
1113#include < stream_compaction/naive.h>
1214#include < stream_compaction/efficient.h>
@@ -24,7 +26,49 @@ int* bookArraya = new int[8]{ 3, 1, 7, 0 ,4 ,1 ,6, 3 };
2426int * bookArrayb = new int [8 ]{};
2527const int BOOK_SIZE = 8 ;
2628
29+ std::string deviceName;
30+ int deviceMaxThreadsPerBlock;
31+ int deviceSharedMemPerBlock;
32+ int deviceMaxThreadsPerSM;
33+ int deviceMaxBlocksPerSM;
34+
2735int main (int argc, char * argv[]) {
36+ cudaDeviceProp deviceProp;
37+ int gpuDevice = 0 ;
38+ int device_count = 0 ;
39+ cudaGetDeviceCount (&device_count);
40+ if (gpuDevice > device_count) {
41+ std::cout
42+ << " Error: GPU device number is greater than the number of devices!"
43+ << " Perhaps a CUDA-capable GPU is not installed?"
44+ << std::endl;
45+ return false ;
46+ }
47+ cudaGetDeviceProperties (&deviceProp, gpuDevice);
48+ int major = deviceProp.major ;
49+ int minor = deviceProp.minor ;
50+ deviceMaxThreadsPerBlock = deviceProp.maxThreadsPerBlock ;
51+ deviceSharedMemPerBlock = deviceProp.sharedMemPerBlock ;
52+ deviceMaxThreadsPerSM = deviceProp.maxThreadsPerMultiProcessor ;
53+ deviceMaxBlocksPerSM = deviceProp.maxBlocksPerMultiProcessor ;
54+
55+
56+
57+ std::ostringstream ss;
58+ ss << " [SM " << major << " ." << minor << " " << deviceProp.name << " ]"
59+ << " \n Max threads per block: " << deviceMaxThreadsPerBlock
60+ << " \n Shared memory per block: " << deviceSharedMemPerBlock << " bytes"
61+ // << "\n Shared memory in each block can fit " << deviceSharedMemPerBlock / sizeof(int) << " number of integers"
62+ << " \n Max threads per SM: " << deviceMaxThreadsPerSM
63+ << " \n Max blocks per SM: " << deviceMaxBlocksPerSM
64+ << " \n Max grid size: " << deviceProp.maxGridSize [0 ] << " , "
65+ << deviceProp.maxGridSize [1 ] << " , " << deviceProp.maxGridSize [2 ];
66+
67+
68+ deviceName = ss.str ();
69+
70+ std::cout << deviceName << ' \n ' ;
71+
2872 // Scan tests
2973
3074 printf (" \n " );
@@ -66,14 +110,15 @@ int main(int argc, char* argv[]) {
66110
67111 printf (" \n " );
68112
69- # if 0
113+
70114 zeroArray (SIZE, c);
71115 printDesc (" naive scan, power-of-two" );
72116 StreamCompaction::Naive::scan (SIZE, c, a);
73117 printElapsedTime (StreamCompaction::Naive::timer ().getGpuElapsedTimeForPreviousOperation (), " (CUDA Measured)" );
74- // printArray(SIZE, c, true);
118+ printArray (SIZE, c, true );
75119 printCmpResult (SIZE, b, c);
76120
121+ #if 0
77122 /* For bug-finding only: Array of 1s to help find bugs in stream compaction or scan
78123 onesArray(SIZE, c);
79124 printDesc("1s array for finding bugs");
0 commit comments