doc: add the example of shell (#190)

njzjz · web-flow · commit 4adb83110b6c · 2022-05-13T01:37:20.000-04:00
diff --git a/doc/examples/shell.md b/doc/examples/shell.md
@@ -0,0 +1,17 @@
+## Running multiple MD tasks on a GPU workstation
+
+In this example, we are going to show how to run multiple MD tasks on a GPU workstation. This workstation does not install any job scheduling packages installed, so we will use `Shell` as `batch_type`.
+
+```{literalinclude} ../../examples/machine/mandu.json
+:language: json
+:linenos:
+```
+
+The workstation has 48 cores of CPUs and 8 RTX3090 cards. Here we hope each card runs 6 tasks at the same time, as each task does not consume too many GPU resources. Thus, `strategy/if_cuda_multi_devices` is set to `true` and `para_deg` is set to 6.
+
+```{literalinclude} ../../examples/resources/mandu.json
+:language: json
+:linenos:
+```
+
+Note that `group_size` should be set as large as possible to ensure there is only one job and avoid running multiple jobs at the same time.
diff --git a/doc/index.rst b/doc/index.rst
@@ -26,7 +26,7 @@ DPDispatcher will monitor (poke) until these jobs finish and download the result
    :caption: Examples
    :glob:
 
-   examples/expanse
+   examples/*
 
 Indices and tables
 ==================
diff --git a/examples/machine/expanse.json b/examples/machine/expanse.json
@@ -7,6 +7,6 @@
   "remote_profile": {
     "hostname": "login.expanse.sdsc.edu",
     "username": "njzjz",
-    "port": "22"
+    "port": 22
   }
 }
diff --git a/examples/machine/mandu.json b/examples/machine/mandu.json
@@ -0,0 +1,12 @@
+{
+  "batch_type": "Shell",
+  "local_root": "./",
+  "remote_root": "/data2/jinzhe/dpgen_workdir",
+  "clean_asynchronously": true,
+  "context_type": "SSHContext",
+  "remote_profile": {
+    "hostname": "mandu.iqb.rutgers.edu",
+    "username": "jz748",
+    "port": 22
+  }
+}
diff --git a/examples/resources/expanse_cpu.json b/examples/resources/expanse_cpu.json
@@ -1,9 +1,9 @@
 {
-  "number_node": "1",
-  "cpu_per_node": "1",
-  "gpu_per_node": "0",
+  "number_node": 1,
+  "cpu_per_node": 1,
+  "gpu_per_node": 0,
   "queue_name": "shared",
-  "group_size": "1",
+  "group_size": 1,
   "custom_flags": [
     "#SBATCH -c 32",
     "#SBATCH --mem=16G",
diff --git a/examples/resources/mandu.json b/examples/resources/mandu.json
@@ -0,0 +1,19 @@
+{
+  "number_node": 1,
+  "cpu_per_node": 48,
+  "gpu_per_node": 8,
+  "queue_name": "shell",
+  "group_size": 9999,
+  "strategy": {
+    "if_cuda_multi_devices": true
+  },
+  "source_list": [
+    "activate /home/jz748/deepmd-kit"
+  ],
+  "envs": {
+    "OMP_NUM_THREADS": 1,
+    "TF_INTRA_OP_PARALLELISM_THREADS": 1,
+    "TF_INTER_OP_PARALLELISM_THREADS": 1
+  },
+  "para_deg": 6
+}

Original file line number	Diff line number	Diff line change
`@@ -7,6 +7,6 @@`
`7`	`7`	`"remote_profile": {`
`8`	`8`	`"hostname": "login.expanse.sdsc.edu",`
`9`	`9`	`"username": "njzjz",`
`10`		`- "port": "22"`
	`10`	`+ "port": 22`
`11`	`11`	`}`
`12`	`12`	`}`