@@ -590,7 +590,7 @@ TEST_F(TestEnvMatA, prod_cpu_equal_cpu) {
590590 // }
591591}
592592
593- #if GOOGLE_CUDA
593+ #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
594594TEST_F (TestEnvMatA, prod_gpu) {
595595 EXPECT_EQ (nlist_r_cpy.size (), nloc);
596596 int tot_nnei = 0 ;
@@ -782,198 +782,4 @@ TEST_F(TestEnvMatA, prod_gpu_equal_cpu) {
782782 }
783783 }
784784}
785- #endif // GOOGLE_CUDA
786-
787- #if TENSORFLOW_USE_ROCM
788- TEST_F (TestEnvMatA, prod_gpu) {
789- EXPECT_EQ (nlist_r_cpy.size (), nloc);
790- int tot_nnei = 0 ;
791- int max_nbor_size = 0 ;
792- for (int ii = 0 ; ii < nlist_a_cpy.size (); ++ii) {
793- tot_nnei += nlist_a_cpy[ii].size ();
794- if (nlist_a_cpy[ii].size () > max_nbor_size) {
795- max_nbor_size = nlist_a_cpy[ii].size ();
796- }
797- }
798- assert (max_nbor_size <= GPU_MAX_NBOR_SIZE);
799- if (max_nbor_size <= 1024 ) {
800- max_nbor_size = 1024 ;
801- } else if (max_nbor_size <= 2048 ) {
802- max_nbor_size = 2048 ;
803- } else {
804- max_nbor_size = 4096 ;
805- }
806- std::vector<int > ilist (nloc), numneigh (nloc);
807- std::vector<int *> firstneigh (nloc);
808- deepmd::InputNlist inlist (nloc, &ilist[0 ], &numneigh[0 ], &firstneigh[0 ]),
809- gpu_inlist;
810- convert_nlist (inlist, nlist_a_cpy);
811- std::vector<double > em (nloc * ndescrpt, 0.0 ),
812- em_deriv (nloc * ndescrpt * 3 , 0.0 ), rij (nloc * nnei * 3 , 0.0 );
813- std::vector<int > nlist (nloc * nnei, 0 );
814- std::vector<double > avg (ntypes * ndescrpt, 0 );
815- std::vector<double > std (ntypes * ndescrpt, 1 );
816-
817- double *em_dev = NULL , *em_deriv_dev = NULL , *rij_dev = NULL ;
818- double *posi_cpy_dev = NULL , *avg_dev = NULL , *std_dev = NULL ;
819- int *atype_cpy_dev = NULL , *nlist_dev = NULL , *array_int_dev = NULL ,
820- *memory_dev = NULL ;
821- uint_64 *array_longlong_dev = NULL ;
822- deepmd::malloc_device_memory_sync (em_dev, em);
823- deepmd::malloc_device_memory_sync (em_deriv_dev, em_deriv);
824- deepmd::malloc_device_memory_sync (rij_dev, rij);
825- deepmd::malloc_device_memory_sync (posi_cpy_dev, posi_cpy);
826- deepmd::malloc_device_memory_sync (avg_dev, avg);
827- deepmd::malloc_device_memory_sync (std_dev, std);
828- deepmd::malloc_device_memory_sync (atype_cpy_dev, atype_cpy);
829- deepmd::malloc_device_memory_sync (nlist_dev, nlist);
830- deepmd::malloc_device_memory (array_int_dev,
831- sec_a.size () + nloc * sec_a.size () + nloc);
832- deepmd::malloc_device_memory (array_longlong_dev,
833- nloc * GPU_MAX_NBOR_SIZE * 2 );
834- deepmd::malloc_device_memory (memory_dev, nloc * max_nbor_size);
835- deepmd::convert_nlist_gpu_device (gpu_inlist, inlist, memory_dev,
836- max_nbor_size);
837-
838- deepmd::prod_env_mat_a_gpu (em_dev, em_deriv_dev, rij_dev, nlist_dev,
839- posi_cpy_dev, atype_cpy_dev, gpu_inlist,
840- array_int_dev, array_longlong_dev, max_nbor_size,
841- avg_dev, std_dev, nloc, nall, rc, rc_smth, sec_a);
842- deepmd::memcpy_device_to_host (em_dev, em);
843- deepmd::delete_device_memory (em_dev);
844- deepmd::delete_device_memory (em_deriv_dev);
845- deepmd::delete_device_memory (nlist_dev);
846- deepmd::delete_device_memory (posi_cpy_dev);
847- deepmd::delete_device_memory (atype_cpy_dev);
848- deepmd::delete_device_memory (array_int_dev);
849- deepmd::delete_device_memory (array_longlong_dev);
850- deepmd::delete_device_memory (avg_dev);
851- deepmd::delete_device_memory (std_dev);
852- deepmd::delete_device_memory (memory_dev);
853- deepmd::free_nlist_gpu_device (gpu_inlist);
854-
855- for (int ii = 0 ; ii < nloc; ++ii) {
856- for (int jj = 0 ; jj < nnei; ++jj) {
857- for (int dd = 0 ; dd < 4 ; ++dd) {
858- EXPECT_LT (fabs (em[ii * nnei * 4 + jj * 4 + dd] -
859- expected_env[ii * nnei * 4 + jj * 4 + dd]),
860- 1e-5 );
861- }
862- }
863- }
864- }
865-
866- TEST_F (TestEnvMatA, prod_gpu_equal_cpu) {
867- EXPECT_EQ (nlist_r_cpy.size (), nloc);
868- int tot_nnei = 0 ;
869- int max_nbor_size = 0 ;
870- for (int ii = 0 ; ii < nlist_a_cpy.size (); ++ii) {
871- tot_nnei += nlist_a_cpy[ii].size ();
872- if (nlist_a_cpy[ii].size () > max_nbor_size) {
873- max_nbor_size = nlist_a_cpy[ii].size ();
874- }
875- }
876- assert (max_nbor_size <= GPU_MAX_NBOR_SIZE);
877- if (max_nbor_size <= 1024 ) {
878- max_nbor_size = 1024 ;
879- } else if (max_nbor_size <= 2048 ) {
880- max_nbor_size = 2048 ;
881- } else {
882- max_nbor_size = 4096 ;
883- }
884- std::vector<int > ilist (nloc), numneigh (nloc);
885- std::vector<int *> firstneigh (nloc);
886- deepmd::InputNlist inlist (nloc, &ilist[0 ], &numneigh[0 ], &firstneigh[0 ]),
887- gpu_inlist;
888- convert_nlist (inlist, nlist_a_cpy);
889- std::vector<double > em (nloc * ndescrpt, 0.0 ),
890- em_deriv (nloc * ndescrpt * 3 , 0.0 ), rij (nloc * nnei * 3 , 0.0 );
891- std::vector<int > nlist (nloc * nnei, 0 );
892- std::vector<double > avg (ntypes * ndescrpt, 0 );
893- std::vector<double > std (ntypes * ndescrpt, 1 );
894-
895- double *em_dev = NULL , *em_deriv_dev = NULL , *rij_dev = NULL ;
896- double *posi_cpy_dev = NULL , *avg_dev = NULL , *std_dev = NULL ;
897- int *atype_cpy_dev = NULL , *nlist_dev = NULL , *array_int_dev = NULL ,
898- *memory_dev = NULL ;
899- uint_64 *array_longlong_dev = NULL ;
900- deepmd::malloc_device_memory_sync (em_dev, em);
901- deepmd::malloc_device_memory_sync (em_deriv_dev, em_deriv);
902- deepmd::malloc_device_memory_sync (rij_dev, rij);
903- deepmd::malloc_device_memory_sync (posi_cpy_dev, posi_cpy);
904- deepmd::malloc_device_memory_sync (avg_dev, avg);
905- deepmd::malloc_device_memory_sync (std_dev, std);
906-
907- deepmd::malloc_device_memory_sync (atype_cpy_dev, atype_cpy);
908- deepmd::malloc_device_memory_sync (nlist_dev, nlist);
909- deepmd::malloc_device_memory (array_int_dev,
910- sec_a.size () + nloc * sec_a.size () + nloc);
911- deepmd::malloc_device_memory (array_longlong_dev,
912- nloc * GPU_MAX_NBOR_SIZE * 2 );
913- deepmd::malloc_device_memory (memory_dev, nloc * max_nbor_size);
914- deepmd::convert_nlist_gpu_device (gpu_inlist, inlist, memory_dev,
915- max_nbor_size);
916-
917- deepmd::prod_env_mat_a_gpu (em_dev, em_deriv_dev, rij_dev, nlist_dev,
918- posi_cpy_dev, atype_cpy_dev, gpu_inlist,
919- array_int_dev, array_longlong_dev, max_nbor_size,
920- avg_dev, std_dev, nloc, nall, rc, rc_smth, sec_a);
921- deepmd::memcpy_device_to_host (em_dev, em);
922- deepmd::memcpy_device_to_host (em_deriv_dev, em_deriv);
923- deepmd::memcpy_device_to_host (rij_dev, rij);
924- deepmd::memcpy_device_to_host (nlist_dev, nlist);
925- deepmd::delete_device_memory (em_dev);
926- deepmd::delete_device_memory (em_deriv_dev);
927- deepmd::delete_device_memory (nlist_dev);
928- deepmd::delete_device_memory (posi_cpy_dev);
929- deepmd::delete_device_memory (atype_cpy_dev);
930- deepmd::delete_device_memory (array_int_dev);
931- deepmd::delete_device_memory (array_longlong_dev);
932- deepmd::delete_device_memory (avg_dev);
933- deepmd::delete_device_memory (std_dev);
934- deepmd::delete_device_memory (memory_dev);
935- deepmd::free_nlist_gpu_device (gpu_inlist);
936-
937- std::vector<int > fmt_nlist_a_1, fmt_nlist_r_1;
938- std::vector<double > env_1, env_deriv_1, rij_a_1;
939- for (int ii = 0 ; ii < nloc; ++ii) {
940- int ret_1 = format_nlist_i_cpu<double >(fmt_nlist_a_1, posi_cpy, atype_cpy,
941- ii, nlist_a_cpy[ii], rc, sec_a);
942- EXPECT_EQ (ret_1, -1 );
943- deepmd::env_mat_a_cpu<double >(env_1, env_deriv_1, rij_a_1, posi_cpy,
944- atype_cpy, ii, fmt_nlist_a_1, sec_a, rc_smth,
945- rc);
946- EXPECT_EQ (env_1.size (), nnei * 4 );
947- EXPECT_EQ (env_deriv_1.size (), nnei * 4 * 3 );
948- EXPECT_EQ (rij_a_1.size (), nnei * 3 );
949- EXPECT_EQ (fmt_nlist_a_1.size (), nnei);
950- EXPECT_EQ (env_1.size () * nloc, em.size ());
951- EXPECT_EQ (env_deriv_1.size () * nloc, em_deriv.size ());
952- EXPECT_EQ (rij_a_1.size () * nloc, rij.size ());
953- EXPECT_EQ (fmt_nlist_a_1.size () * nloc, nlist.size ());
954- for (unsigned jj = 0 ; jj < env_1.size (); ++jj) {
955- EXPECT_LT (fabs (em[ii * nnei * 4 + jj] - env_1[jj]), 1e-10 );
956- }
957- for (unsigned jj = 0 ; jj < env_deriv_1.size (); ++jj) {
958- EXPECT_LT (fabs (em_deriv[ii * nnei * 4 * 3 + jj] - env_deriv_1[jj]),
959- 1e-10 );
960- }
961- for (unsigned jj = 0 ; jj < rij_a_1.size (); ++jj) {
962- EXPECT_LT (fabs (rij[ii * nnei * 3 + jj] - rij_a_1[jj]), 1e-10 );
963- }
964- for (unsigned jj = 0 ; jj < fmt_nlist_a_1.size (); ++jj) {
965- EXPECT_EQ (nlist[ii * nnei + jj], fmt_nlist_a_1[jj]);
966- }
967- }
968-
969- for (int ii = 0 ; ii < nloc; ++ii) {
970- for (int jj = 0 ; jj < nnei; ++jj) {
971- for (int dd = 0 ; dd < 4 ; ++dd) {
972- EXPECT_LT (fabs (em[ii * nnei * 4 + jj * 4 + dd] -
973- expected_env[ii * nnei * 4 + jj * 4 + dd]),
974- 1e-5 );
975- }
976- }
977- }
978- }
979- #endif // TENSORFLOW_USE_ROCM
785+ #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
0 commit comments