33#include " SimulationRegion.h"
44#include < stdexcept>
55
6- #define MAGIC_NUMBER 1024
76
87#ifdef USE_CUDA_TOOLKIT
98#include " cuda_runtime.h"
1413#define cudaErrcheck (res ) { cudaAssert ((res), __FILE__, __LINE__); }
1514inline void cudaAssert (cudaError_t code, const char *file, int line, bool abort=true )
1615{
17- if (code != cudaSuccess)
16+ if (code != cudaSuccess)
1817 {
1918 fprintf (stderr," cuda assert: %s %s %d\n " , cudaGetErrorString (code), file, line);
2019 if (abort) exit (code);
@@ -273,34 +272,19 @@ NNPInter::~NNPInter() {
273272 cudaErrcheck (cudaFree (ilist));
274273 cudaErrcheck (cudaFree (jrange));
275274 cudaErrcheck (cudaFree (jlist));
276- cudaErrcheck (cudaFree (array_int));
277- cudaErrcheck (cudaFree (array_longlong));
278- cudaErrcheck (cudaFree (array_double));
279275 }
280276 #endif
281277}
282278
283279#ifdef USE_CUDA_TOOLKIT
284280void NNPInter::update_nbor (const InternalNeighborList & nlist, const int nloc) {
285281 if (!init_nbor) {
286- sec_a = cum_sum (get_sel_a ());
287282 cudaErrcheck (cudaMalloc ((void **)&ilist, sizeof (int ) * nlist.ilist .size ()));
288283 cudaErrcheck (cudaMalloc ((void **)&jrange, sizeof (int ) * nlist.jrange .size ()));
289284 cudaErrcheck (cudaMalloc ((void **)&jlist, sizeof (int ) * nlist.jlist .size ()));
290- cudaErrcheck (cudaMalloc ((void **)&array_int, sizeof (int ) * (sec_a.size () + nloc * sec_a.size () + nloc)));
291- cudaErrcheck (cudaMalloc ((void **)&array_longlong, sizeof (unsigned long long ) * nloc * MAGIC_NUMBER * 2 ));
292- #ifdef HIGH_PREC
293- cudaErrcheck (cudaMalloc ((void **)&array_double, sizeof (compute_t ) * nloc * sec_a.back () * 3 ));
294- #else
295- cudaErrcheck (cudaMalloc ((void **)&array_double, sizeof (compute_t ) * nloc * sec_a.back () * 3 ));
296- #endif
297285 ilist_size = nlist.ilist .size ();
298286 jrange_size = nlist.jrange .size ();
299287 jlist_size = nlist.jlist .size ();
300- arr_int_size = sec_a.size () + nloc * sec_a.size () + nloc;
301- arr_ll_size = nloc * MAGIC_NUMBER * 2 ;
302- arr_dou_size = nloc * sec_a.back () * 3 ;
303- init_nbor = true ;
304288 }
305289 if (ilist_size < nlist.ilist .size ()) {
306290 cudaErrcheck (cudaFree (ilist));
@@ -317,25 +301,7 @@ void NNPInter::update_nbor(const InternalNeighborList & nlist, const int nloc) {
317301 cudaErrcheck (cudaMalloc ((void **)&jlist, sizeof (int ) * nlist.jlist .size ()));
318302 jlist_size = nlist.jlist .size ();
319303 }
320- if (arr_int_size < sec_a.size () + nloc * sec_a.size () + nloc) {
321- cudaErrcheck (cudaFree (array_int));
322- cudaErrcheck (cudaMalloc ((void **)&array_int, sizeof (int ) * (sec_a.size () + nloc * sec_a.size () + nloc)));
323- arr_int_size = sec_a.size () + nloc * sec_a.size () + nloc;
324- }
325- if (arr_ll_size < nloc * MAGIC_NUMBER * 2 ) {
326- cudaErrcheck (cudaFree (array_longlong));
327- cudaErrcheck (cudaMalloc ((void **)&array_longlong, sizeof (unsigned long long ) * nloc * MAGIC_NUMBER * 2 ));
328- arr_ll_size = nloc * MAGIC_NUMBER * 2 ;
329- }
330- if (arr_dou_size < nloc * sec_a.back () * 3 ) {
331- cudaErrcheck (cudaFree (array_double));
332- #ifdef HIGH_PREC
333- cudaErrcheck (cudaMalloc ((void **)&array_double, sizeof (compute_t ) * nloc * sec_a.back () * 3 ));
334- #else
335- cudaErrcheck (cudaMalloc ((void **)&array_double, sizeof (compute_t ) * nloc * sec_a.back () * 3 ));
336- #endif
337- arr_dou_size = nloc * sec_a.back () * 3 ;
338- }
304+
339305 cudaErrcheck (cudaMemcpy (ilist, &nlist.ilist [0 ], sizeof (int ) * nlist.ilist .size (), cudaMemcpyHostToDevice));
340306 cudaErrcheck (cudaMemcpy (jrange, &nlist.jrange [0 ], sizeof (int ) * nlist.jrange .size (), cudaMemcpyHostToDevice));
341307 cudaErrcheck (cudaMemcpy (jlist, &nlist.jlist [0 ], sizeof (int ) * nlist.jlist .size (), cudaMemcpyHostToDevice));
@@ -378,14 +344,10 @@ init (const string & model, const int & gpu_rank)
378344 if (dfparam < 0 ) dfparam = 0 ;
379345 if (daparam < 0 ) daparam = 0 ;
380346 inited = true ;
381-
347+
382348 init_nbor = false ;
383- array_int = NULL ;
384- array_double = NULL ;
385- array_longlong = NULL ;
386349 ilist = NULL ; jrange = NULL ; jlist = NULL ;
387350 ilist_size = 0 ; jrange_size = 0 ; jlist_size = 0 ;
388- arr_int_size = 0 ; arr_ll_size = 0 ; arr_dou_size = 0 ;
389351}
390352#else
391353void
@@ -415,12 +377,8 @@ init (const string & model, const int & gpu_rank)
415377 inited = true ;
416378
417379 init_nbor = false ;
418- array_int = NULL ;
419- array_double = NULL ;
420- array_longlong = NULL ;
421380 ilist = NULL ; jrange = NULL ; jlist = NULL ;
422381 ilist_size = 0 ; jrange_size = 0 ; jlist_size = 0 ;
423- arr_int_size = 0 ; arr_ll_size = 0 ; arr_dou_size = 0 ;
424382}
425383#endif
426384
@@ -602,7 +560,7 @@ compute_inner (ENERGYTYPE & dener,
602560 }
603561
604562 #ifdef USE_CUDA_TOOLKIT
605- int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, ilist, jrange, jlist, array_int, array_longlong, array_double, fparam, aparam, nnpmap, nghost);
563+ int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, ilist, jrange, jlist, fparam, aparam, nnpmap, nghost);
606564 #else
607565 int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, aparam, nnpmap, nghost);
608566 #endif
@@ -669,7 +627,7 @@ compute (ENERGYTYPE & dener,
669627 }
670628
671629 #ifdef USE_CUDA_TOOLKIT
672- int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, ilist, jrange, jlist, array_int, array_longlong, array_double, fparam, aparam, nnpmap, nghost);
630+ int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, ilist, jrange, jlist, fparam, aparam, nnpmap, nghost);
673631 #else
674632 int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, aparam, nnpmap, nghost);
675633 #endif
@@ -710,9 +668,6 @@ NNPInterModelDevi::~NNPInterModelDevi() {
710668 cudaErrcheck (cudaFree (ilist));
711669 cudaErrcheck (cudaFree (jrange));
712670 cudaErrcheck (cudaFree (jlist));
713- cudaErrcheck (cudaFree (array_int));
714- cudaErrcheck (cudaFree (array_longlong));
715- cudaErrcheck (cudaFree (array_double));
716671 }
717672#endif
718673}
@@ -761,14 +716,10 @@ init (const vector<string> & models, const int & gpu_rank)
761716 // cell_size = rcut;
762717 // ntypes = get_ntypes();
763718 inited = true ;
764-
719+
765720 init_nbor = false ;
766- array_int = NULL ;
767- array_double = NULL ;
768- array_longlong = NULL ;
769721 ilist = NULL ; jrange = NULL ; jlist = NULL ;
770722 ilist_size = 0 ; jrange_size = 0 ; jlist_size = 0 ;
771- arr_int_size = 0 ; arr_ll_size = 0 ; arr_dou_size = 0 ;
772723}
773724#else
774725void
@@ -798,14 +749,10 @@ init (const vector<string> & models, const int & gpu_rank)
798749 // cell_size = rcut;
799750 // ntypes = get_ntypes();
800751 inited = true ;
801-
752+
802753 init_nbor = false ;
803- array_int = NULL ;
804- array_double = NULL ;
805- array_longlong = NULL ;
806754 ilist = NULL ; jrange = NULL ; jlist = NULL ;
807755 ilist_size = 0 ; jrange_size = 0 ; jlist_size = 0 ;
808- arr_int_size = 0 ; arr_ll_size = 0 ; arr_dou_size = 0 ;
809756}
810757#endif
811758
@@ -873,41 +820,18 @@ cum_sum (const std::vector<std::vector<int32> > n_sel)
873820 }
874821}
875822
876- void
877- NNPInterModelDevi::
878- get_max_sec ()
879- {
880- for (int ii = 0 ; ii < numb_models; ii++) {
881- this ->max_sec_size = max_sec_size < sec[ii].size () ? sec[ii].size () : max_sec_size;
882- this ->max_sec_back = max_sec_back < sec[ii].back () ? sec[ii].back () : max_sec_back;
883- }
884- }
885-
886823#ifdef USE_CUDA_TOOLKIT
887824void
888825NNPInterModelDevi::
889826update_nbor (const InternalNeighborList & nlist, const int nloc)
890827{
891828 if (!init_nbor) {
892- cum_sum (get_sel ());
893- get_max_sec ();
894829 cudaErrcheck (cudaMalloc ((void **)&ilist, sizeof (int ) * nlist.ilist .size ()));
895830 cudaErrcheck (cudaMalloc ((void **)&jrange, sizeof (int ) * nlist.jrange .size ()));
896831 cudaErrcheck (cudaMalloc ((void **)&jlist, sizeof (int ) * nlist.jlist .size ()));
897- cudaErrcheck (cudaMalloc ((void **)&array_int, sizeof (int ) * (max_sec_size + nloc * max_sec_size + nloc)));
898- cudaErrcheck (cudaMalloc ((void **)&array_longlong, sizeof (unsigned long long ) * nloc * MAGIC_NUMBER * 2 ));
899- #ifdef HIGH_PREC
900- cudaErrcheck (cudaMalloc ((void **)&array_double, sizeof (compute_t ) * nloc * max_sec_back * 3 ));
901- #else
902- cudaErrcheck (cudaMalloc ((void **)&array_double, sizeof (compute_t ) * nloc * max_sec_back * 3 ));
903- #endif
904832 ilist_size = nlist.ilist .size ();
905833 jrange_size = nlist.jrange .size ();
906834 jlist_size = nlist.jlist .size ();
907- arr_int_size = max_sec_size + nloc * max_sec_size + nloc;
908- arr_ll_size = nloc * MAGIC_NUMBER * 2 ;
909- arr_dou_size = nloc * max_sec_back * 3 ;
910- init_nbor = true ;
911835 }
912836 if (ilist_size < nlist.ilist .size ()) {
913837 cudaErrcheck (cudaFree (ilist));
@@ -924,25 +848,7 @@ update_nbor(const InternalNeighborList & nlist, const int nloc)
924848 cudaErrcheck (cudaMalloc ((void **)&jlist, sizeof (int ) * nlist.jlist .size ()));
925849 jlist_size = nlist.jlist .size ();
926850 }
927- if (arr_int_size < max_sec_size + nloc * max_sec_size + nloc) {
928- cudaErrcheck (cudaFree (array_int));
929- cudaErrcheck (cudaMalloc ((void **)&array_int, sizeof (int ) * (max_sec_size + nloc * max_sec_size + nloc)));
930- arr_int_size = max_sec_size + nloc * max_sec_size + nloc;
931- }
932- if (arr_ll_size < nloc * MAGIC_NUMBER * 2 ) {
933- cudaErrcheck (cudaFree (array_longlong));
934- cudaErrcheck (cudaMalloc ((void **)&array_longlong, sizeof (unsigned long long ) * nloc * MAGIC_NUMBER * 2 ));
935- arr_ll_size = nloc * MAGIC_NUMBER * 2 ;
936- }
937- if (arr_dou_size < nloc * max_sec_back * 3 ) {
938- cudaErrcheck (cudaFree (array_double));
939- #ifdef HIGH_PREC
940- cudaErrcheck (cudaMalloc ((void **)&array_double, sizeof (compute_t ) * nloc * max_sec_back * 3 ));
941- #else
942- cudaErrcheck (cudaMalloc ((void **)&array_double, sizeof (compute_t ) * nloc * max_sec_back * 3 ));
943- #endif
944- arr_dou_size = nloc * max_sec_back * 3 ;
945- }
851+
946852 cudaErrcheck (cudaMemcpy (ilist, &nlist.ilist [0 ], sizeof (int ) * nlist.ilist .size (), cudaMemcpyHostToDevice));
947853 cudaErrcheck (cudaMemcpy (jrange, &nlist.jrange [0 ], sizeof (int ) * nlist.jrange .size (), cudaMemcpyHostToDevice));
948854 cudaErrcheck (cudaMemcpy (jlist, &nlist.jlist [0 ], sizeof (int ) * nlist.jlist .size (), cudaMemcpyHostToDevice));
@@ -1044,7 +950,7 @@ compute (vector<ENERGYTYPE> & all_energy,
1044950
1045951 }
1046952 #ifdef USE_CUDA_TOOLKIT
1047- int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, ilist, jrange, jlist, array_int, array_longlong, array_double, fparam, aparam, nnpmap, nghost);
953+ int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, ilist, jrange, jlist, fparam, aparam, nnpmap, nghost);
1048954 #else
1049955 int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, aparam, nnpmap, nghost);
1050956 #endif
@@ -1094,7 +1000,7 @@ compute (vector<ENERGYTYPE> & all_energy,
10941000
10951001 }
10961002 #ifdef USE_CUDA_TOOLKIT
1097- int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, ilist, jrange, jlist, array_int, array_longlong, array_double, fparam, aparam, nnpmap, nghost);
1003+ int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, ilist, jrange, jlist, fparam, aparam, nnpmap, nghost);
10981004 #else
10991005 int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, aparam, nnpmap, nghost);
11001006 #endif
0 commit comments