4040#include < complex>
4141#include < chrono>
4242#include " include/fit_tsne.h"
43+ #include " verify.hpp"
4344
4445// #ifndef DEBUG_TIME
4546// #define DEBUG_TIME
6869#define PRINT_IL_TIMER (x ) std::cout << #x << " : " << ((float )x.count()) / 1000000.0 << " s" << std::endl
6970#endif
7071
71- double tsnecuda::RunTsne (tsnecuda::Options& opt)
72+ double tsnecuda::RunTsne (tsnecuda::Options& opt, int & success )
7273{
7374 std::chrono::steady_clock::time_point time_start_;
7475 std::chrono::steady_clock::time_point time_end_;
@@ -420,14 +421,15 @@ double tsnecuda::RunTsne(tsnecuda::Options& opt)
420421 std::cout << " done." << std::endl;
421422 }
422423
423- // int fft_dimensions[2] = {n_fft_coeffs, n_fft_coeffs}; // {780, 780}
424+ int fft_dimensions[2 ] = {n_fft_coeffs, n_fft_coeffs}; // {780, 780}
425+ size_t work_size_idft, work_size_dft;
424426
425- // std::int64_t fwd_strides1[3] = {0, n_fft_coeffs, 1}; // {0, 780, 1} -> 0 + 780*i + j
426- // std::int64_t fwd_strides2[3] = {0, (n_fft_coeffs/2+1)*2, 1}; // {0, 780, 1} -> 0 + 780*i + j
427- // std::int64_t bwd_strides[3] = {0, (n_fft_coeffs/2+1), 1}; // {0, 391, 1} -> 0 + 391*i + j
428- // std::int64_t fwd_distances1 = n_fft_coeffs* n_fft_coeffs;
429- // std::int64_t fwd_distances2 = n_fft_coeffs*(n_fft_coeffs/2+1)*2;
430- // std::int64_t bwd_distances = n_fft_coeffs*(n_fft_coeffs/2+1) ;
427+ std::int64_t fwd_strides1[3 ] = {0 , n_fft_coeffs, 1 }; // {0, 780, 1} -> 0 + 780*i + j
428+ std::int64_t fwd_strides2[3 ] = {0 , (n_fft_coeffs/2 +1 )*2 , 1 }; // {0, 780, 1} -> 0 + 780*i + j
429+ std::int64_t bwd_strides[3 ] = {0 , (n_fft_coeffs/2 +1 ), 1 }; // {0, 391, 1} -> 0 + 391*i + j
430+ std::int64_t fwd_distances1 = n_fft_coeffs* n_fft_coeffs;
431+ std::int64_t fwd_distances2 = n_fft_coeffs*(n_fft_coeffs/2 +1 )*2 ;
432+ std::int64_t bwd_distances = n_fft_coeffs*(n_fft_coeffs/2 +1 ) ;
431433
432434 // std::cout << "Setting up dft plans...\n";
433435 // // *** TIMED SEPARATELY. NOT ADDED TO PERF TIME ***
@@ -443,27 +445,66 @@ double tsnecuda::RunTsne(tsnecuda::Options& opt)
443445 // TIME_SINCE(time_start);
444446
445447 // TIME_START();
446- // std::shared_ptr<descriptor_t> plan_dft;
447- // plan_dft = std::make_shared<descriptor_t>(std::vector<std::int64_t>{n_fft_coeffs, n_fft_coeffs});
448- // plan_dft->set_value(oneapi::mkl::dft::config_param::PLACEMENT, DFTI_CONFIG_VALUE::DFTI_NOT_INPLACE);
449- // plan_dft->set_value(oneapi::mkl::dft::config_param::INPUT_STRIDES, fwd_strides1);
450- // plan_dft->set_value(oneapi::mkl::dft::config_param::OUTPUT_STRIDES, bwd_strides);
451- // plan_dft->set_value(oneapi::mkl::dft::config_param::FWD_DISTANCE, fwd_distances1);
452- // plan_dft->set_value(oneapi::mkl::dft::config_param::BWD_DISTANCE, bwd_distances);
453- // plan_dft->set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS, n_terms);
454- // plan_dft->commit(qts);
455- // TIME_SINCE(time_start);
456448
449+
450+ #if defined(USE_NVIDIA_BACKEND)
451+ cufftHandle plan_dft;
452+ CufftSafeCall (cufftCreate (&plan_dft));
453+ CufftSafeCall (cufftMakePlanMany (
454+ plan_dft,
455+ 2 ,
456+ fft_dimensions,
457+ NULL ,
458+ 1 ,
459+ n_fft_coeffs * n_fft_coeffs,
460+ NULL ,
461+ 1 ,
462+ n_fft_coeffs * (n_fft_coeffs / 2 + 1 ),
463+ CUFFT_R2C,
464+ n_terms,
465+ &work_size_dft)
466+ );
467+ #else
468+ std::shared_ptr<descriptor_t > plan_dft;
469+ plan_dft = std::make_shared<descriptor_t >(std::vector<std::int64_t >{n_fft_coeffs, n_fft_coeffs});
470+ plan_dft->set_value (oneapi::mkl::dft::config_param::PLACEMENT, DFTI_CONFIG_VALUE::DFTI_NOT_INPLACE);
471+ plan_dft->set_value (oneapi::mkl::dft::config_param::INPUT_STRIDES, fwd_strides1);
472+ plan_dft->set_value (oneapi::mkl::dft::config_param::OUTPUT_STRIDES, bwd_strides);
473+ plan_dft->set_value (oneapi::mkl::dft::config_param::FWD_DISTANCE, fwd_distances1);
474+ plan_dft->set_value (oneapi::mkl::dft::config_param::BWD_DISTANCE, bwd_distances);
475+ plan_dft->set_value (oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS, n_terms);
476+ plan_dft->commit (qts);
477+ #endif
478+ // TIME_SINCE(time_start);
457479 // TIME_START();
458- // std::shared_ptr<descriptor_t> plan_idft;
459- // plan_idft = std::make_shared<descriptor_t>(std::vector<std::int64_t>{n_fft_coeffs, n_fft_coeffs});
460- // plan_idft->set_value(oneapi::mkl::dft::config_param::PLACEMENT, DFTI_CONFIG_VALUE::DFTI_NOT_INPLACE);
461- // plan_idft->set_value(oneapi::mkl::dft::config_param::INPUT_STRIDES, bwd_strides);
462- // plan_idft->set_value(oneapi::mkl::dft::config_param::OUTPUT_STRIDES, fwd_strides2);
463- // plan_idft->set_value(oneapi::mkl::dft::config_param::FWD_DISTANCE, fwd_distances2);
464- // plan_idft->set_value(oneapi::mkl::dft::config_param::BWD_DISTANCE, bwd_distances);
465- // plan_idft->set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS, n_terms);
466- // plan_idft->commit(qts);
480+ #if defined(USE_NVIDIA_BACKEND)
481+ cufftHandle plan_idft;
482+ CufftSafeCall (cufftCreate (&plan_idft));
483+ CufftSafeCall (cufftMakePlanMany (
484+ plan_idft,
485+ 2 ,
486+ fft_dimensions,
487+ NULL ,
488+ 1 ,
489+ n_fft_coeffs * (n_fft_coeffs / 2 + 1 ),
490+ NULL ,
491+ 1 ,
492+ n_fft_coeffs * n_fft_coeffs,
493+ CUFFT_C2R,
494+ n_terms,
495+ &work_size_idft)
496+ );
497+ #else
498+ std::shared_ptr<descriptor_t > plan_idft;
499+ plan_idft = std::make_shared<descriptor_t >(std::vector<std::int64_t >{n_fft_coeffs, n_fft_coeffs});
500+ plan_idft->set_value (oneapi::mkl::dft::config_param::PLACEMENT, DFTI_CONFIG_VALUE::DFTI_NOT_INPLACE);
501+ plan_idft->set_value (oneapi::mkl::dft::config_param::INPUT_STRIDES, bwd_strides);
502+ plan_idft->set_value (oneapi::mkl::dft::config_param::OUTPUT_STRIDES, fwd_strides1);
503+ plan_idft->set_value (oneapi::mkl::dft::config_param::FWD_DISTANCE, fwd_distances1);
504+ plan_idft->set_value (oneapi::mkl::dft::config_param::BWD_DISTANCE, bwd_distances);
505+ plan_idft->set_value (oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS, n_terms);
506+ plan_idft->commit (qts);
507+ #endif
467508 // // *** TIMED SEPARATELY. NOT ADDED TO PERF TIME ***
468509 // TIME_SINCE(time_start);
469510 // std::cout << "done.\n";
@@ -564,8 +605,8 @@ double tsnecuda::RunTsne(tsnecuda::Options& opt)
564605#endif
565606
566607 tsnecuda::NbodyFFT2D (
567- // plan_dft,
568- // plan_idft,
608+ plan_dft,
609+ plan_idft,
569610 fft_kernel_tilde_device, // input
570611 fft_w_coefficients, // intermediate value
571612 N,
@@ -723,6 +764,9 @@ double tsnecuda::RunTsne(tsnecuda::Options& opt)
723764 dump_file << host_ys[i] << " " << host_ys[i + num_points] << std::endl;
724765 }
725766 dump_file.close ();
767+
768+ std::string golden_file = " ../../data/tsne_mnist_output_golden.txt" ;
769+ success = verify (golden_file, opt.get_dump_file (), 0.2 , 10.0 );
726770 TIMER_END_ ()
727771
728772 sycl::free (host_ys, qts);
0 commit comments