@@ -62,103 +62,112 @@ int main(int argc, char **argv)
6262 float matchingTime = 0.0 ;
6363 float ioReadTime = 0.0 ;
6464 float dataVerificationTime = 0.0 ;
65+ int data_verification_flag = 0 ;
66+ int iterations = 50 ; // // Running for 50 iterations to avg out intial heavy loading time in cuda
6567
66- // Read images using OpenCV
67- cv::Mat limg, rimg;
68- auto ioRead_start = std::chrono::steady_clock::now ();
69- if (imgSet)
70- {
71- cv::imread (" ../../inputData/left.pgm" , 0 ).convertTo (limg, CV_32FC1);
72- cv::imread (" ../../inputData/righ.pgm" , 0 ).convertTo (rimg, CV_32FC1);
73- }
74- else
68+ auto setDevice_start = std::chrono::steady_clock::now ();
69+ cudaSetDevice (0 );
70+ auto setDevice_stop = std::chrono::steady_clock::now ();
71+ std::cout << " cudaSetDevice Time is " << std::chrono::duration<float , std::milli>(setDevice_stop - setDevice_start).count () << " ms" << std::endl;
72+
73+ for (int i = 0 ; i < iterations; ++i)
7574 {
76- cv::imread (" ../../inputData/img1.png" , 0 ).convertTo (limg, CV_32FC1);
77- cv::imread (" ../../inputData/img2.png" , 0 ).convertTo (rimg, CV_32FC1);
78- }
79- auto ioRead_stop = std::chrono::steady_clock::now ();
80- ioReadTime = std::chrono::duration<float , std::micro>(ioRead_stop - ioRead_start).count ();
75+ // Read images using OpenCV
76+ cv::Mat limg, rimg;
77+ auto ioRead_start = std::chrono::steady_clock::now ();
78+ if (imgSet)
79+ {
80+ cv::imread (" ../../inputData/left.pgm" , 0 ).convertTo (limg, CV_32FC1);
81+ cv::imread (" ../../inputData/righ.pgm" , 0 ).convertTo (rimg, CV_32FC1);
82+ }
83+ else
84+ {
85+ cv::imread (" ../../inputData/img1.png" , 0 ).convertTo (limg, CV_32FC1);
86+ cv::imread (" ../../inputData/img2.png" , 0 ).convertTo (rimg, CV_32FC1);
87+ }
88+ auto ioRead_stop = std::chrono::steady_clock::now ();
89+ ioReadTime = std::chrono::duration<float , std::micro>(ioRead_stop - ioRead_start).count ();
8190
82- unsigned int w = limg.cols ;
83- unsigned int h = limg.rows ;
84- std::cout << " Image size = (" << w << " ," << h << " )" << std::endl;
91+ unsigned int w = limg.cols ;
92+ unsigned int h = limg.rows ;
93+ std::cout << " Image size = (" << w << " ," << h << " )" << std::endl;
8594
86- // Initial Cuda images and download images to device
87- std::cout << " Initializing data..." << std::endl;
88- cudaSetDevice (0 );
89- CudaImage img1, img2;
95+ // Initial Cuda images and download images to device
96+ std::cout << " Initializing data..." << std::endl;
97+ // cudaSetDevice(0);
98+ CudaImage img1, img2;
9099
91- img1.Allocate (w, h, iAlignUp (w, 128 ), false , imageInitTime, NULL , (float *)limg.data );
92- img2.Allocate (w, h, iAlignUp (w, 128 ), false , imageInitTime, NULL , (float *)rimg.data );
93- img1.Download (imageInitTime);
94- img2.Download (imageInitTime);
100+ img1.Allocate (w, h, iAlignUp (w, 128 ), false , imageInitTime, NULL , (float *)limg.data );
101+ img2.Allocate (w, h, iAlignUp (w, 128 ), false , imageInitTime, NULL , (float *)rimg.data );
102+ img1.Download (imageInitTime);
103+ img2.Download (imageInitTime);
95104
96- // Extract Sift features from images
97- SiftData siftData1, siftData2;
98- float initBlur = 1 .0f ;
99- float thresh = (imgSet ? 4 .5f : 2 .0f );
105+ // Extract Sift features from images
106+ SiftData siftData1, siftData2;
107+ float initBlur = 1 .0f ;
108+ float thresh = (imgSet ? 4 .5f : 2 .0f );
100109
101- InitSiftData (siftData1, imageInitTime, 32768 , true , true );
102- InitSiftData (siftData2, imageInitTime, 32768 , true , true );
110+ InitSiftData (siftData1, imageInitTime, 32768 , true , true );
111+ InitSiftData (siftData2, imageInitTime, 32768 , true , true );
103112
104- // A bit of benchmarking
105- // for (int thresh1=1.00f;thresh1<=4.01f;thresh1+=0.50f) {
106- float *memoryTmp = AllocSiftTempMemory (w, h, 5 , imageInitTime, false );
107- for (int i = 0 ; i < 50 ; i++)
108- {
109- float time = 0 .0f ; // set total time to init time
110- ExtractSift (siftData1, img1, 5 , initBlur, thresh, time, 0 .0f , false , memoryTmp);
111- extractSiftTime += time;
112- time = 0 .0f ;
113- ExtractSift (siftData2, img2, 5 , initBlur, thresh, time, 0 .0f , false , memoryTmp);
114- extractSiftTime += time;
115- }
116- FreeSiftTempMemory (memoryTmp);
113+ // A bit of benchmarking
114+ // for (int thresh1=1.00f;thresh1<=4.01f;thresh1+=0.50f) {
115+ float *memoryTmp = AllocSiftTempMemory (w, h, 5 , imageInitTime, false );
116+ for (int i = 0 ; i < 50 ; i++)
117+ {
118+ float time = 0 .0f ; // set total time to init time
119+ ExtractSift (siftData1, img1, 5 , initBlur, thresh, time, 0 .0f , false , memoryTmp);
120+ extractSiftTime += time;
121+ time = 0 .0f ;
122+ ExtractSift (siftData2, img2, 5 , initBlur, thresh, time, 0 .0f , false , memoryTmp);
123+ extractSiftTime += time;
124+ }
125+ FreeSiftTempMemory (memoryTmp);
117126
118- // Match Sift features and find a homography
119- for (int i = 0 ; i < 1 ; i++)
120- MatchSiftData (siftData1, siftData2, matchingTime);
121- float homography[9 ];
122- int numMatches;
123- FindHomography (siftData1, homography, &numMatches, matchingTime, 10000 , 0 .00f , 0 .80f , 5.0 );
124- int numFit = ImproveHomography (siftData1, homography, 5 , 0 .00f , 0 .80f , 3.0 );
125- float matchPercentage = 100 .0f * numFit / std::min (siftData1.numPts , siftData2.numPts );
127+ // Match Sift features and find a homography
128+ for (int i = 0 ; i < 1 ; i++)
129+ MatchSiftData (siftData1, siftData2, matchingTime);
130+ float homography[9 ];
131+ int numMatches;
132+ FindHomography (siftData1, homography, &numMatches, matchingTime, 10000 , 0 .00f , 0 .80f , 5.0 );
133+ int numFit = ImproveHomography (siftData1, homography, 5 , 0 .00f , 0 .80f , 3.0 );
134+ float matchPercentage = 100 .0f * numFit / std::min (siftData1.numPts , siftData2.numPts );
126135
127- std::cout << " Number of original features: " << siftData1.numPts << " " << siftData2.numPts << std::endl;
128- std::cout << " Number of matching features: " << numFit << " " << numMatches << " " << matchPercentage << " % " << initBlur << " " << thresh << " \n "
129- << std::endl;
136+ std::cout << " Number of original features: " << siftData1.numPts << " " << siftData2.numPts << std::endl;
137+ std::cout << " Number of matching features: " << numFit << " " << numMatches << " " << matchPercentage << " % " << initBlur << " " << thresh << " \n "
138+ << std::endl;
130139
131140#ifdef DEVICE_TIMER
132- totTime = imageInitTime + extractSiftTime + matchingTime;
141+ totTime = imageInitTime + extractSiftTime + matchingTime;
133142
134- std::cout << " Images initialization time = " << imageInitTime / 1000 << " ms" << std::endl;
135- std::cout << " Feature extraction time = " << extractSiftTime / 1000 << " ms" << std::endl;
136- std::cout << " Matching time = " << matchingTime / 1000 << " ms"
137- << " \n "
138- << std::endl;
139- std::cout << " Total Deivce Time = " << totTime / 1000 << " ms"
140- << " \n "
141- << std::endl;
143+ std::cout << " Images initialization time = " << imageInitTime / 1000 << " ms" << std::endl;
144+ std::cout << " Feature extraction time = " << extractSiftTime / 1000 << " ms" << std::endl;
145+ std::cout << " Matching time = " << matchingTime / 1000 << " ms"
146+ << " \n "
147+ << std::endl;
148+ std::cout << " Total Deivce Time = " << totTime / 1000 << " ms"
149+ << " \n "
150+ << std::endl;
142151#endif
143152
144- // data validation
145- auto dataVerficationTimer_start = std::chrono::steady_clock::now ();
146- int data_verification_flag = Utility::RunDataVerification (thresh, matchPercentage);
147- auto dataVerficationTimer_stop = std::chrono::steady_clock::now ();
148- dataVerificationTime = std::chrono::duration<float , std::micro>(dataVerficationTimer_stop - dataVerficationTimer_start).count ();
149- // // Print out and store summary data
150- // // PrintMatchData(siftData1, siftData2, img1);
151- // cv::imwrite("data/limg_pts.pgm", limg);
153+ // data validation
154+ auto dataVerficationTimer_start = std::chrono::steady_clock::now ();
155+ data_verification_flag = Utility::RunDataVerification (thresh, matchPercentage);
156+ auto dataVerficationTimer_stop = std::chrono::steady_clock::now ();
157+ dataVerificationTime + = std::chrono::duration<float , std::micro>(dataVerficationTimer_stop - dataVerficationTimer_start).count ();
158+ // // Print out and store summary data
159+ // // PrintMatchData(siftData1, siftData2, img1);
160+ // cv::imwrite("data/limg_pts.pgm", limg);
152161
153- // MatchAll(siftData1, siftData2, homography);
154-
155- // Free Sift data from device
156- FreeSiftData (siftData1);
157- FreeSiftData (siftData2);
162+ // MatchAll(siftData1, siftData2, homography);
158163
164+ // Free Sift data from device
165+ FreeSiftData (siftData1);
166+ FreeSiftData (siftData2);
167+ }
159168 auto totalProgTimer_end = std::chrono::steady_clock::now ();
160169 float totalProgramTime = std::chrono::duration<float , std::micro>(totalProgTimer_end - totalProgTimer_start).count () - ioReadTime - dataVerificationTime;
161- std::cout << " Total workload time = " << totalProgramTime / 1000 << " ms"
170+ std::cout << " Avg workload time = " << totalProgramTime / ( 1000 * iterations) << " ms"
162171 << " \n "
163172 << std::endl;
164173 return data_verification_flag;
0 commit comments