Skip to content

Commit fa2c0b6

Browse files
authored
merge CUDA and ROCm codes in tests (#2846)
Signed-off-by: Jinzhe Zeng <[email protected]>
1 parent 338018c commit fa2c0b6

19 files changed

+41
-1894
lines changed

source/lib/tests/test_coord.cc

Lines changed: 5 additions & 370 deletions
Large diffs are not rendered by default.

source/lib/tests/test_env_mat_a.cc

Lines changed: 2 additions & 196 deletions
Original file line numberDiff line numberDiff line change
@@ -590,7 +590,7 @@ TEST_F(TestEnvMatA, prod_cpu_equal_cpu) {
590590
// }
591591
}
592592

593-
#if GOOGLE_CUDA
593+
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
594594
TEST_F(TestEnvMatA, prod_gpu) {
595595
EXPECT_EQ(nlist_r_cpy.size(), nloc);
596596
int tot_nnei = 0;
@@ -782,198 +782,4 @@ TEST_F(TestEnvMatA, prod_gpu_equal_cpu) {
782782
}
783783
}
784784
}
785-
#endif // GOOGLE_CUDA
786-
787-
#if TENSORFLOW_USE_ROCM
788-
TEST_F(TestEnvMatA, prod_gpu) {
789-
EXPECT_EQ(nlist_r_cpy.size(), nloc);
790-
int tot_nnei = 0;
791-
int max_nbor_size = 0;
792-
for (int ii = 0; ii < nlist_a_cpy.size(); ++ii) {
793-
tot_nnei += nlist_a_cpy[ii].size();
794-
if (nlist_a_cpy[ii].size() > max_nbor_size) {
795-
max_nbor_size = nlist_a_cpy[ii].size();
796-
}
797-
}
798-
assert(max_nbor_size <= GPU_MAX_NBOR_SIZE);
799-
if (max_nbor_size <= 1024) {
800-
max_nbor_size = 1024;
801-
} else if (max_nbor_size <= 2048) {
802-
max_nbor_size = 2048;
803-
} else {
804-
max_nbor_size = 4096;
805-
}
806-
std::vector<int> ilist(nloc), numneigh(nloc);
807-
std::vector<int *> firstneigh(nloc);
808-
deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]),
809-
gpu_inlist;
810-
convert_nlist(inlist, nlist_a_cpy);
811-
std::vector<double> em(nloc * ndescrpt, 0.0),
812-
em_deriv(nloc * ndescrpt * 3, 0.0), rij(nloc * nnei * 3, 0.0);
813-
std::vector<int> nlist(nloc * nnei, 0);
814-
std::vector<double> avg(ntypes * ndescrpt, 0);
815-
std::vector<double> std(ntypes * ndescrpt, 1);
816-
817-
double *em_dev = NULL, *em_deriv_dev = NULL, *rij_dev = NULL;
818-
double *posi_cpy_dev = NULL, *avg_dev = NULL, *std_dev = NULL;
819-
int *atype_cpy_dev = NULL, *nlist_dev = NULL, *array_int_dev = NULL,
820-
*memory_dev = NULL;
821-
uint_64 *array_longlong_dev = NULL;
822-
deepmd::malloc_device_memory_sync(em_dev, em);
823-
deepmd::malloc_device_memory_sync(em_deriv_dev, em_deriv);
824-
deepmd::malloc_device_memory_sync(rij_dev, rij);
825-
deepmd::malloc_device_memory_sync(posi_cpy_dev, posi_cpy);
826-
deepmd::malloc_device_memory_sync(avg_dev, avg);
827-
deepmd::malloc_device_memory_sync(std_dev, std);
828-
deepmd::malloc_device_memory_sync(atype_cpy_dev, atype_cpy);
829-
deepmd::malloc_device_memory_sync(nlist_dev, nlist);
830-
deepmd::malloc_device_memory(array_int_dev,
831-
sec_a.size() + nloc * sec_a.size() + nloc);
832-
deepmd::malloc_device_memory(array_longlong_dev,
833-
nloc * GPU_MAX_NBOR_SIZE * 2);
834-
deepmd::malloc_device_memory(memory_dev, nloc * max_nbor_size);
835-
deepmd::convert_nlist_gpu_device(gpu_inlist, inlist, memory_dev,
836-
max_nbor_size);
837-
838-
deepmd::prod_env_mat_a_gpu(em_dev, em_deriv_dev, rij_dev, nlist_dev,
839-
posi_cpy_dev, atype_cpy_dev, gpu_inlist,
840-
array_int_dev, array_longlong_dev, max_nbor_size,
841-
avg_dev, std_dev, nloc, nall, rc, rc_smth, sec_a);
842-
deepmd::memcpy_device_to_host(em_dev, em);
843-
deepmd::delete_device_memory(em_dev);
844-
deepmd::delete_device_memory(em_deriv_dev);
845-
deepmd::delete_device_memory(nlist_dev);
846-
deepmd::delete_device_memory(posi_cpy_dev);
847-
deepmd::delete_device_memory(atype_cpy_dev);
848-
deepmd::delete_device_memory(array_int_dev);
849-
deepmd::delete_device_memory(array_longlong_dev);
850-
deepmd::delete_device_memory(avg_dev);
851-
deepmd::delete_device_memory(std_dev);
852-
deepmd::delete_device_memory(memory_dev);
853-
deepmd::free_nlist_gpu_device(gpu_inlist);
854-
855-
for (int ii = 0; ii < nloc; ++ii) {
856-
for (int jj = 0; jj < nnei; ++jj) {
857-
for (int dd = 0; dd < 4; ++dd) {
858-
EXPECT_LT(fabs(em[ii * nnei * 4 + jj * 4 + dd] -
859-
expected_env[ii * nnei * 4 + jj * 4 + dd]),
860-
1e-5);
861-
}
862-
}
863-
}
864-
}
865-
866-
TEST_F(TestEnvMatA, prod_gpu_equal_cpu) {
867-
EXPECT_EQ(nlist_r_cpy.size(), nloc);
868-
int tot_nnei = 0;
869-
int max_nbor_size = 0;
870-
for (int ii = 0; ii < nlist_a_cpy.size(); ++ii) {
871-
tot_nnei += nlist_a_cpy[ii].size();
872-
if (nlist_a_cpy[ii].size() > max_nbor_size) {
873-
max_nbor_size = nlist_a_cpy[ii].size();
874-
}
875-
}
876-
assert(max_nbor_size <= GPU_MAX_NBOR_SIZE);
877-
if (max_nbor_size <= 1024) {
878-
max_nbor_size = 1024;
879-
} else if (max_nbor_size <= 2048) {
880-
max_nbor_size = 2048;
881-
} else {
882-
max_nbor_size = 4096;
883-
}
884-
std::vector<int> ilist(nloc), numneigh(nloc);
885-
std::vector<int *> firstneigh(nloc);
886-
deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]),
887-
gpu_inlist;
888-
convert_nlist(inlist, nlist_a_cpy);
889-
std::vector<double> em(nloc * ndescrpt, 0.0),
890-
em_deriv(nloc * ndescrpt * 3, 0.0), rij(nloc * nnei * 3, 0.0);
891-
std::vector<int> nlist(nloc * nnei, 0);
892-
std::vector<double> avg(ntypes * ndescrpt, 0);
893-
std::vector<double> std(ntypes * ndescrpt, 1);
894-
895-
double *em_dev = NULL, *em_deriv_dev = NULL, *rij_dev = NULL;
896-
double *posi_cpy_dev = NULL, *avg_dev = NULL, *std_dev = NULL;
897-
int *atype_cpy_dev = NULL, *nlist_dev = NULL, *array_int_dev = NULL,
898-
*memory_dev = NULL;
899-
uint_64 *array_longlong_dev = NULL;
900-
deepmd::malloc_device_memory_sync(em_dev, em);
901-
deepmd::malloc_device_memory_sync(em_deriv_dev, em_deriv);
902-
deepmd::malloc_device_memory_sync(rij_dev, rij);
903-
deepmd::malloc_device_memory_sync(posi_cpy_dev, posi_cpy);
904-
deepmd::malloc_device_memory_sync(avg_dev, avg);
905-
deepmd::malloc_device_memory_sync(std_dev, std);
906-
907-
deepmd::malloc_device_memory_sync(atype_cpy_dev, atype_cpy);
908-
deepmd::malloc_device_memory_sync(nlist_dev, nlist);
909-
deepmd::malloc_device_memory(array_int_dev,
910-
sec_a.size() + nloc * sec_a.size() + nloc);
911-
deepmd::malloc_device_memory(array_longlong_dev,
912-
nloc * GPU_MAX_NBOR_SIZE * 2);
913-
deepmd::malloc_device_memory(memory_dev, nloc * max_nbor_size);
914-
deepmd::convert_nlist_gpu_device(gpu_inlist, inlist, memory_dev,
915-
max_nbor_size);
916-
917-
deepmd::prod_env_mat_a_gpu(em_dev, em_deriv_dev, rij_dev, nlist_dev,
918-
posi_cpy_dev, atype_cpy_dev, gpu_inlist,
919-
array_int_dev, array_longlong_dev, max_nbor_size,
920-
avg_dev, std_dev, nloc, nall, rc, rc_smth, sec_a);
921-
deepmd::memcpy_device_to_host(em_dev, em);
922-
deepmd::memcpy_device_to_host(em_deriv_dev, em_deriv);
923-
deepmd::memcpy_device_to_host(rij_dev, rij);
924-
deepmd::memcpy_device_to_host(nlist_dev, nlist);
925-
deepmd::delete_device_memory(em_dev);
926-
deepmd::delete_device_memory(em_deriv_dev);
927-
deepmd::delete_device_memory(nlist_dev);
928-
deepmd::delete_device_memory(posi_cpy_dev);
929-
deepmd::delete_device_memory(atype_cpy_dev);
930-
deepmd::delete_device_memory(array_int_dev);
931-
deepmd::delete_device_memory(array_longlong_dev);
932-
deepmd::delete_device_memory(avg_dev);
933-
deepmd::delete_device_memory(std_dev);
934-
deepmd::delete_device_memory(memory_dev);
935-
deepmd::free_nlist_gpu_device(gpu_inlist);
936-
937-
std::vector<int> fmt_nlist_a_1, fmt_nlist_r_1;
938-
std::vector<double> env_1, env_deriv_1, rij_a_1;
939-
for (int ii = 0; ii < nloc; ++ii) {
940-
int ret_1 = format_nlist_i_cpu<double>(fmt_nlist_a_1, posi_cpy, atype_cpy,
941-
ii, nlist_a_cpy[ii], rc, sec_a);
942-
EXPECT_EQ(ret_1, -1);
943-
deepmd::env_mat_a_cpu<double>(env_1, env_deriv_1, rij_a_1, posi_cpy,
944-
atype_cpy, ii, fmt_nlist_a_1, sec_a, rc_smth,
945-
rc);
946-
EXPECT_EQ(env_1.size(), nnei * 4);
947-
EXPECT_EQ(env_deriv_1.size(), nnei * 4 * 3);
948-
EXPECT_EQ(rij_a_1.size(), nnei * 3);
949-
EXPECT_EQ(fmt_nlist_a_1.size(), nnei);
950-
EXPECT_EQ(env_1.size() * nloc, em.size());
951-
EXPECT_EQ(env_deriv_1.size() * nloc, em_deriv.size());
952-
EXPECT_EQ(rij_a_1.size() * nloc, rij.size());
953-
EXPECT_EQ(fmt_nlist_a_1.size() * nloc, nlist.size());
954-
for (unsigned jj = 0; jj < env_1.size(); ++jj) {
955-
EXPECT_LT(fabs(em[ii * nnei * 4 + jj] - env_1[jj]), 1e-10);
956-
}
957-
for (unsigned jj = 0; jj < env_deriv_1.size(); ++jj) {
958-
EXPECT_LT(fabs(em_deriv[ii * nnei * 4 * 3 + jj] - env_deriv_1[jj]),
959-
1e-10);
960-
}
961-
for (unsigned jj = 0; jj < rij_a_1.size(); ++jj) {
962-
EXPECT_LT(fabs(rij[ii * nnei * 3 + jj] - rij_a_1[jj]), 1e-10);
963-
}
964-
for (unsigned jj = 0; jj < fmt_nlist_a_1.size(); ++jj) {
965-
EXPECT_EQ(nlist[ii * nnei + jj], fmt_nlist_a_1[jj]);
966-
}
967-
}
968-
969-
for (int ii = 0; ii < nloc; ++ii) {
970-
for (int jj = 0; jj < nnei; ++jj) {
971-
for (int dd = 0; dd < 4; ++dd) {
972-
EXPECT_LT(fabs(em[ii * nnei * 4 + jj * 4 + dd] -
973-
expected_env[ii * nnei * 4 + jj * 4 + dd]),
974-
1e-5);
975-
}
976-
}
977-
}
978-
}
979-
#endif // TENSORFLOW_USE_ROCM
785+
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM

0 commit comments

Comments
 (0)