33#include " SimulationRegion.h"
44#include < stdexcept>
55
6- #define MAGIC_NUMBER 1024
76
87#ifdef USE_CUDA_TOOLKIT
98#include " cuda_runtime.h"
1413#define cudaErrcheck (res ) { cudaAssert ((res), __FILE__, __LINE__); }
1514inline void cudaAssert (cudaError_t code, const char *file, int line, bool abort=true )
1615{
17- if (code != cudaSuccess)
16+ if (code != cudaSuccess)
1817 {
1918 fprintf (stderr," cuda assert: %s %s %d\n " , cudaGetErrorString (code), file, line);
2019 if (abort) exit (code);
@@ -273,33 +272,19 @@ NNPInter::~NNPInter() {
273272 cudaErrcheck (cudaFree (ilist));
274273 cudaErrcheck (cudaFree (jrange));
275274 cudaErrcheck (cudaFree (jlist));
276- cudaErrcheck (cudaFree (array_int));
277- cudaErrcheck (cudaFree (array_longlong));
278- cudaErrcheck (cudaFree (array_double));
279275 }
280276 #endif
281277}
282278
283279#ifdef USE_CUDA_TOOLKIT
284280void NNPInter::update_nbor (const InternalNeighborList & nlist, const int nloc) {
285281 if (!init_nbor) {
286- sec_a = cum_sum (get_sel_a ());
287282 cudaErrcheck (cudaMalloc ((void **)&ilist, sizeof (int ) * nlist.ilist .size ()));
288283 cudaErrcheck (cudaMalloc ((void **)&jrange, sizeof (int ) * nlist.jrange .size ()));
289284 cudaErrcheck (cudaMalloc ((void **)&jlist, sizeof (int ) * nlist.jlist .size ()));
290- cudaErrcheck (cudaMalloc ((void **)&array_int, sizeof (int ) * (sec_a.size () + nloc * sec_a.size () + nloc)));
291- cudaErrcheck (cudaMalloc ((void **)&array_longlong, sizeof (unsigned long long ) * nloc * MAGIC_NUMBER * 2 ));
292- #ifdef HIGH_PREC
293- cudaErrcheck (cudaMalloc ((void **)&array_double, sizeof (compute_t ) * nloc * sec_a.back () * 3 ));
294- #else
295- cudaErrcheck (cudaMalloc ((void **)&array_double, sizeof (compute_t ) * nloc * sec_a.back () * 3 ));
296- #endif
297285 ilist_size = nlist.ilist .size ();
298286 jrange_size = nlist.jrange .size ();
299287 jlist_size = nlist.jlist .size ();
300- arr_int_size = sec_a.size () + nloc * sec_a.size () + nloc;
301- arr_ll_size = nloc * MAGIC_NUMBER * 2 ;
302- arr_dou_size = nloc * sec_a.back () * 3 ;
303288 init_nbor = true ;
304289 }
305290 if (ilist_size < nlist.ilist .size ()) {
@@ -317,25 +302,7 @@ void NNPInter::update_nbor(const InternalNeighborList & nlist, const int nloc) {
317302 cudaErrcheck (cudaMalloc ((void **)&jlist, sizeof (int ) * nlist.jlist .size ()));
318303 jlist_size = nlist.jlist .size ();
319304 }
320- if (arr_int_size < sec_a.size () + nloc * sec_a.size () + nloc) {
321- cudaErrcheck (cudaFree (array_int));
322- cudaErrcheck (cudaMalloc ((void **)&array_int, sizeof (int ) * (sec_a.size () + nloc * sec_a.size () + nloc)));
323- arr_int_size = sec_a.size () + nloc * sec_a.size () + nloc;
324- }
325- if (arr_ll_size < nloc * MAGIC_NUMBER * 2 ) {
326- cudaErrcheck (cudaFree (array_longlong));
327- cudaErrcheck (cudaMalloc ((void **)&array_longlong, sizeof (unsigned long long ) * nloc * MAGIC_NUMBER * 2 ));
328- arr_ll_size = nloc * MAGIC_NUMBER * 2 ;
329- }
330- if (arr_dou_size < nloc * sec_a.back () * 3 ) {
331- cudaErrcheck (cudaFree (array_double));
332- #ifdef HIGH_PREC
333- cudaErrcheck (cudaMalloc ((void **)&array_double, sizeof (compute_t ) * nloc * sec_a.back () * 3 ));
334- #else
335- cudaErrcheck (cudaMalloc ((void **)&array_double, sizeof (compute_t ) * nloc * sec_a.back () * 3 ));
336- #endif
337- arr_dou_size = nloc * sec_a.back () * 3 ;
338- }
305+
339306 cudaErrcheck (cudaMemcpy (ilist, &nlist.ilist [0 ], sizeof (int ) * nlist.ilist .size (), cudaMemcpyHostToDevice));
340307 cudaErrcheck (cudaMemcpy (jrange, &nlist.jrange [0 ], sizeof (int ) * nlist.jrange .size (), cudaMemcpyHostToDevice));
341308 cudaErrcheck (cudaMemcpy (jlist, &nlist.jlist [0 ], sizeof (int ) * nlist.jlist .size (), cudaMemcpyHostToDevice));
@@ -378,14 +345,10 @@ init (const string & model, const int & gpu_rank)
378345 if (dfparam < 0 ) dfparam = 0 ;
379346 if (daparam < 0 ) daparam = 0 ;
380347 inited = true ;
381-
348+
382349 init_nbor = false ;
383- array_int = NULL ;
384- array_double = NULL ;
385- array_longlong = NULL ;
386350 ilist = NULL ; jrange = NULL ; jlist = NULL ;
387351 ilist_size = 0 ; jrange_size = 0 ; jlist_size = 0 ;
388- arr_int_size = 0 ; arr_ll_size = 0 ; arr_dou_size = 0 ;
389352}
390353#else
391354void
@@ -415,12 +378,8 @@ init (const string & model, const int & gpu_rank)
415378 inited = true ;
416379
417380 init_nbor = false ;
418- array_int = NULL ;
419- array_double = NULL ;
420- array_longlong = NULL ;
421381 ilist = NULL ; jrange = NULL ; jlist = NULL ;
422382 ilist_size = 0 ; jrange_size = 0 ; jlist_size = 0 ;
423- arr_int_size = 0 ; arr_ll_size = 0 ; arr_dou_size = 0 ;
424383}
425384#endif
426385
@@ -602,7 +561,7 @@ compute_inner (ENERGYTYPE & dener,
602561 }
603562
604563 #ifdef USE_CUDA_TOOLKIT
605- int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, ilist, jrange, jlist, array_int, array_longlong, array_double, fparam, aparam, nnpmap, nghost);
564+ int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, ilist, jrange, jlist, fparam, aparam, nnpmap, nghost);
606565 #else
607566 int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, aparam, nnpmap, nghost);
608567 #endif
@@ -669,7 +628,7 @@ compute (ENERGYTYPE & dener,
669628 }
670629
671630 #ifdef USE_CUDA_TOOLKIT
672- int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, ilist, jrange, jlist, array_int, array_longlong, array_double, fparam, aparam, nnpmap, nghost);
631+ int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, ilist, jrange, jlist, fparam, aparam, nnpmap, nghost);
673632 #else
674633 int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, aparam, nnpmap, nghost);
675634 #endif
@@ -710,9 +669,6 @@ NNPInterModelDevi::~NNPInterModelDevi() {
710669 cudaErrcheck (cudaFree (ilist));
711670 cudaErrcheck (cudaFree (jrange));
712671 cudaErrcheck (cudaFree (jlist));
713- cudaErrcheck (cudaFree (array_int));
714- cudaErrcheck (cudaFree (array_longlong));
715- cudaErrcheck (cudaFree (array_double));
716672 }
717673#endif
718674}
@@ -761,14 +717,10 @@ init (const vector<string> & models, const int & gpu_rank)
761717 // cell_size = rcut;
762718 // ntypes = get_ntypes();
763719 inited = true ;
764-
720+
765721 init_nbor = false ;
766- array_int = NULL ;
767- array_double = NULL ;
768- array_longlong = NULL ;
769722 ilist = NULL ; jrange = NULL ; jlist = NULL ;
770723 ilist_size = 0 ; jrange_size = 0 ; jlist_size = 0 ;
771- arr_int_size = 0 ; arr_ll_size = 0 ; arr_dou_size = 0 ;
772724}
773725#else
774726void
@@ -798,14 +750,10 @@ init (const vector<string> & models, const int & gpu_rank)
798750 // cell_size = rcut;
799751 // ntypes = get_ntypes();
800752 inited = true ;
801-
753+
802754 init_nbor = false ;
803- array_int = NULL ;
804- array_double = NULL ;
805- array_longlong = NULL ;
806755 ilist = NULL ; jrange = NULL ; jlist = NULL ;
807756 ilist_size = 0 ; jrange_size = 0 ; jlist_size = 0 ;
808- arr_int_size = 0 ; arr_ll_size = 0 ; arr_dou_size = 0 ;
809757}
810758#endif
811759
@@ -873,40 +821,18 @@ cum_sum (const std::vector<std::vector<int32> > n_sel)
873821 }
874822}
875823
876- void
877- NNPInterModelDevi::
878- get_max_sec ()
879- {
880- for (int ii = 0 ; ii < numb_models; ii++) {
881- this ->max_sec_size = max_sec_size < sec[ii].size () ? sec[ii].size () : max_sec_size;
882- this ->max_sec_back = max_sec_back < sec[ii].back () ? sec[ii].back () : max_sec_back;
883- }
884- }
885-
886824#ifdef USE_CUDA_TOOLKIT
887825void
888826NNPInterModelDevi::
889827update_nbor (const InternalNeighborList & nlist, const int nloc)
890828{
891829 if (!init_nbor) {
892- cum_sum (get_sel ());
893- get_max_sec ();
894830 cudaErrcheck (cudaMalloc ((void **)&ilist, sizeof (int ) * nlist.ilist .size ()));
895831 cudaErrcheck (cudaMalloc ((void **)&jrange, sizeof (int ) * nlist.jrange .size ()));
896832 cudaErrcheck (cudaMalloc ((void **)&jlist, sizeof (int ) * nlist.jlist .size ()));
897- cudaErrcheck (cudaMalloc ((void **)&array_int, sizeof (int ) * (max_sec_size + nloc * max_sec_size + nloc)));
898- cudaErrcheck (cudaMalloc ((void **)&array_longlong, sizeof (unsigned long long ) * nloc * MAGIC_NUMBER * 2 ));
899- #ifdef HIGH_PREC
900- cudaErrcheck (cudaMalloc ((void **)&array_double, sizeof (compute_t ) * nloc * max_sec_back * 3 ));
901- #else
902- cudaErrcheck (cudaMalloc ((void **)&array_double, sizeof (compute_t ) * nloc * max_sec_back * 3 ));
903- #endif
904833 ilist_size = nlist.ilist .size ();
905834 jrange_size = nlist.jrange .size ();
906835 jlist_size = nlist.jlist .size ();
907- arr_int_size = max_sec_size + nloc * max_sec_size + nloc;
908- arr_ll_size = nloc * MAGIC_NUMBER * 2 ;
909- arr_dou_size = nloc * max_sec_back * 3 ;
910836 init_nbor = true ;
911837 }
912838 if (ilist_size < nlist.ilist .size ()) {
@@ -924,25 +850,7 @@ update_nbor(const InternalNeighborList & nlist, const int nloc)
924850 cudaErrcheck (cudaMalloc ((void **)&jlist, sizeof (int ) * nlist.jlist .size ()));
925851 jlist_size = nlist.jlist .size ();
926852 }
927- if (arr_int_size < max_sec_size + nloc * max_sec_size + nloc) {
928- cudaErrcheck (cudaFree (array_int));
929- cudaErrcheck (cudaMalloc ((void **)&array_int, sizeof (int ) * (max_sec_size + nloc * max_sec_size + nloc)));
930- arr_int_size = max_sec_size + nloc * max_sec_size + nloc;
931- }
932- if (arr_ll_size < nloc * MAGIC_NUMBER * 2 ) {
933- cudaErrcheck (cudaFree (array_longlong));
934- cudaErrcheck (cudaMalloc ((void **)&array_longlong, sizeof (unsigned long long ) * nloc * MAGIC_NUMBER * 2 ));
935- arr_ll_size = nloc * MAGIC_NUMBER * 2 ;
936- }
937- if (arr_dou_size < nloc * max_sec_back * 3 ) {
938- cudaErrcheck (cudaFree (array_double));
939- #ifdef HIGH_PREC
940- cudaErrcheck (cudaMalloc ((void **)&array_double, sizeof (compute_t ) * nloc * max_sec_back * 3 ));
941- #else
942- cudaErrcheck (cudaMalloc ((void **)&array_double, sizeof (compute_t ) * nloc * max_sec_back * 3 ));
943- #endif
944- arr_dou_size = nloc * max_sec_back * 3 ;
945- }
853+
946854 cudaErrcheck (cudaMemcpy (ilist, &nlist.ilist [0 ], sizeof (int ) * nlist.ilist .size (), cudaMemcpyHostToDevice));
947855 cudaErrcheck (cudaMemcpy (jrange, &nlist.jrange [0 ], sizeof (int ) * nlist.jrange .size (), cudaMemcpyHostToDevice));
948856 cudaErrcheck (cudaMemcpy (jlist, &nlist.jlist [0 ], sizeof (int ) * nlist.jlist .size (), cudaMemcpyHostToDevice));
@@ -1044,7 +952,7 @@ compute (vector<ENERGYTYPE> & all_energy,
1044952
1045953 }
1046954 #ifdef USE_CUDA_TOOLKIT
1047- int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, ilist, jrange, jlist, array_int, array_longlong, array_double, fparam, aparam, nnpmap, nghost);
955+ int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, ilist, jrange, jlist, fparam, aparam, nnpmap, nghost);
1048956 #else
1049957 int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, aparam, nnpmap, nghost);
1050958 #endif
@@ -1094,7 +1002,7 @@ compute (vector<ENERGYTYPE> & all_energy,
10941002
10951003 }
10961004 #ifdef USE_CUDA_TOOLKIT
1097- int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, ilist, jrange, jlist, array_int, array_longlong, array_double, fparam, aparam, nnpmap, nghost);
1005+ int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, ilist, jrange, jlist, fparam, aparam, nnpmap, nghost);
10981006 #else
10991007 int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, aparam, nnpmap, nghost);
11001008 #endif
0 commit comments