Minor changes to compile with OpenACC and latest cuda + pgi compiler: (#107)

pramodk · web-flow · commit 9b88683f946f · 2018-07-31T11:13:28.000+02:00
* Minor changes to compile with OpenACC and latest cuda + pgi compiler:
    - acc routine seq in header causes pgi compiler internal errors
    - sm_20 is outdated, keep minimum sm_30 and add sm_60
* Update README and fix issue with disabling cuda modules
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -236,6 +236,7 @@ if(ENABLE_OPENACC)
         endif(CUDA_FOUND)
     else(ENABLE_CUDA_MODULES)
         message(INFO "Support for CUDA modules (e.g. Random123) is disabled!")
+        add_definitions(-DCUDA_MODULES_DISABLED)
     endif(ENABLE_CUDA_MODULES)
 
 ELSE(ENABLE_OPENACC)
diff --git a/README.md b/README.md
@@ -62,19 +62,25 @@ CoreNEURON has support for GPUs using the OpenACC programming model when enabled
 
 ```bash
 module purge
-module load pgi/pgi64/16.5 pgi/mpich/16.5   #change pgi and cuda modules
-module load cuda/6.0
+module purge all
+module load pgi/18.4 cuda/9.0.176 cmake/3.5  #change pgi and cuda modules
 
 export CC=mpicc
 export CXX=mpicxx
 
 cmake .. -DADDITIONAL_MECHPATH="/path/of/folder/with/mod_files" -DCMAKE_C_FLAGS:STRING="-O2" -DCMAKE_CXX_FLAGS:STRING="-O2" -DCOMPILE_LIBRARY_TYPE=STATIC -DCMAKE_INSTALL_PREFIX=$EXPER_DIR/install/ -DCUDA_HOST_COMPILER=`which gcc` -DCUDA_PROPAGATE_HOST_FLAGS=OFF -DENABLE_SELECTIVE_GPU_PROFILING=ON -DENABLE_OPENACC=ON
 ```
 
-Note that the CUDA Toolkit version should be compatible with PGI compiler installed on your system. Otherwise you have to add extra C/C++ flags. For example, if we are using CUDA Toolkit 7.5 installation but PGI default target is CUDA 7.0 then we have to add :
+Note that the CUDA Toolkit version should be compatible with PGI compiler installed on your system. Otherwise you have to add extra C/C++ flags. For example, if we are using CUDA Toolkit 9.0 installation but PGI default target is CUDA 8.0 then we have to add :
 
 ```bash
--DCMAKE_C_FLAGS:STRING="-O2 -ta=tesla:cuda7.5" -DCMAKE_CXX_FLAGS:STRING="-O2 -ta=tesla:cuda7.5"
+-DCMAKE_C_FLAGS:STRING="-O2 -ta=tesla:cuda9.0" -DCMAKE_CXX_FLAGS:STRING="-O2 -ta=tesla:cuda9.0"
+```
+
+If there are large functions / procedures in MOD file that are not inlined by compiler, one can pass additional c/c++ compiler flags:
+
+```bash
+-Minline=size:1000,levels:100,totalsize:40000,maxsize:4000
 ```
 
 CoreNEURON uses the Random123 library written in CUDA. If you are **not using `NrnRandom123`** in your model and have issues with CUDA compilation/linking (or CUDA Toolkit is not installed), you can disable the CUDA dependency using the CMake option `-DENABLE_CUDA_MODULES=OFF` :
diff --git a/coreneuron/CMakeLists.txt b/coreneuron/CMakeLists.txt
@@ -272,7 +272,9 @@ set(link_reportinglib)
 
 #@TODO: CMake should have option for arch
 if(ENABLE_OPENACC AND ENABLE_CUDA_MODULES)
-    cuda_add_library("cudacoreneuron" ${coreneuron_cuda_files} OPTIONS -arch=sm_20)
+    cuda_add_library("cudacoreneuron" ${coreneuron_cuda_files} OPTIONS
+        -gencode=arch=compute_30,code=sm_30
+        -gencode=arch=compute_60,code=sm_60)
     set(link_cudacoreneuron cudacoreneuron)
 endif()
 
diff --git a/coreneuron/mech/mod2c_core_thread.h b/coreneuron/mech/mod2c_core_thread.h
@@ -114,7 +114,6 @@ extern int nrn_kinetic_steer(int, SparseObj*, double*, _threadargsproto_);
 
 // derived from nrn/src/scopmath/euler.c
 // updated for aos/soa layout index
-#pragma acc routine seq
 static inline int euler_thread(int neqn, int* var, int* der, DIFUN fun, _threadargsproto_) {
     double dt = _nt->_dt;
     int i;
diff --git a/coreneuron/utils/randoms/nrnran123.cpp b/coreneuron/utils/randoms/nrnran123.cpp
@@ -48,7 +48,7 @@ size_t nrnran123_state_size() {
 
 void nrnran123_set_globalindex(uint32_t gix) {
     k.v[0] = gix;
-#if (defined(__CUDACC__) || defined(_OPENACC))
+#if (defined(__CUDACC__) || defined(_OPENACC)) && !defined(CUDA_MODULES_DISABLED)
     nrnran123_set_gpu_globalindex(gix);
 #endif
 }

Original file line number	Diff line number	Diff line change
`@@ -48,7 +48,7 @@ size_t nrnran123_state_size() {`
`48`	`48`
`49`	`49`	`void nrnran123_set_globalindex(uint32_t gix) {`
`50`	`50`	`k.v[0] = gix;`
`51`		`-#if (defined(__CUDACC__) \|\| defined(_OPENACC))`
	`51`	`+#if (defined(__CUDACC__) \|\| defined(_OPENACC)) && !defined(CUDA_MODULES_DISABLED)`
`52`	`52`	`nrnran123_set_gpu_globalindex(gix);`
`53`	`53`	`#endif`
`54`	`54`	`}`