doc: add the example of expanse (#188)

njzjz · web-flow · commit 5ec9b3902952 · 2022-05-12T23:05:48.000-04:00
* add more examples

* write doc

* fix grammar
diff --git a/doc/conf.py b/doc/conf.py
@@ -31,7 +31,7 @@
 # ones.
 extensions = [
     'deepmodeling_sphinx',
-    'recommonmark',
+    'myst_parser',
     "sphinx_rtd_theme",
     'sphinx.ext.viewcode',
     'sphinx.ext.intersphinx',
diff --git a/doc/examples/expanse.md b/doc/examples/expanse.md
@@ -0,0 +1,24 @@
+# Running the DeePMD-kit on the Expanse cluster
+
+[Expanse](https://www.sdsc.edu/support/user_guides/expanse.html) is a cluster operated by the San Diego Supercomputer Center. Here we provide an example to run jobs on the expanse.
+
+The machine parameters are provided below. Expanse uses the SLURM workload manager for job scheduling. `remote_root` has been created in advance. It's worth metioned that we do not recommend to use the password, so [SSH keys](https://www.ssh.com/academy/ssh/key) are used instead to improve security.
+
+```{literalinclude} ../../examples/machine/expanse.json
+:language: json
+:linenos:
+```
+
+Expanse's standard compute nodes are each powered by two 64-core AMD EPYC 7742 processors and contain 256 GB of DDR4 memory. Here, we request one node with 32 cores and 16 GB memory from the `shared` partition. Expanse does not support `--gres=gpu:0` command, so we use `custom_gpu_line` to customize the statement.
+
+```{literalinclude} ../../examples/resources/expanse_cpu.json
+:language: json
+:linenos:
+```
+
+The following task parameter runs a DeePMD-kit task, forwarding an input file and backwarding graph files. Here, the data set will be used among all the tasks, so it is not included in the `forward_files`. Instead, it should be included in the submission's `forward_common_files`.
+
+```{literalinclude} ../../examples/task/deepmd-kit.json
+:language: json
+:linenos:
+```
diff --git a/doc/index.rst b/doc/index.rst
@@ -22,6 +22,11 @@ DPDispatcher will monitor (poke) until these jobs finish and download the result
    task
    api/api
 
+.. toctree::
+   :caption: Examples
+   :glob:
+
+   examples/expanse
 
 Indices and tables
 ==================
diff --git a/examples/machine/expanse.json b/examples/machine/expanse.json
@@ -0,0 +1,12 @@
+{
+  "batch_type": "Slurm",
+  "local_root": "./",
+  "remote_root": "/expanse/lustre/scratch/njzjz/temp_project/dpgen_workdir",
+  "clean_asynchronously": true,
+  "context_type": "SSHContext",
+  "remote_profile": {
+    "hostname": "login.expanse.sdsc.edu",
+    "username": "njzjz",
+    "port": "22"
+  }
+}
diff --git a/examples/machine/lazy_local.json b/examples/machine/lazy_local.json
@@ -0,0 +1,5 @@
+{
+  "batch_type": "Shell",
+  "local_root": "./",
+  "context_type": "LazyLocalContext"
+}
diff --git a/examples/resources/expanse_cpu.json b/examples/resources/expanse_cpu.json
@@ -0,0 +1,27 @@
+{
+  "number_node": "1",
+  "cpu_per_node": "1",
+  "gpu_per_node": "0",
+  "queue_name": "shared",
+  "group_size": "1",
+  "custom_flags": [
+    "#SBATCH -c 32",
+    "#SBATCH --mem=16G",
+    "#SBATCH --time=48:00:00",
+    "#SBATCH --account=rut149",
+    "#SBATCH --requeue"
+  ],
+  "source_list": [
+    "activate /home/njzjz/deepmd-kit"
+  ],
+  "envs": {
+    "OMP_NUM_THREADS": 4,
+    "TF_INTRA_OP_PARALLELISM_THREADS": 4,
+    "TF_INTER_OP_PARALLELISM_THREADS": 8,
+    "DP_AUTO_PARALLELIZATION": 1
+  },
+  "batch_type": "Slurm",
+  "kwargs": {
+    "custom_gpu_line": "#SBATCH --gpus=0"
+  }
+}
diff --git a/examples/task/deepmd-kit.json b/examples/task/deepmd-kit.json
@@ -0,0 +1,13 @@
+{
+    "command": "dp train input.json && dp freeze && dp compress",
+    "task_work_path": "model1/",
+    "forward_files": [
+      "input.json"
+    ],
+    "backward_files": [
+      "frozen_model.pb",
+      "frozen_model_compressed.pb"
+    ],
+    "outlog": "log",
+    "errlog": "err"
+}
diff --git a/setup.py b/setup.py
@@ -40,7 +40,7 @@
     keywords='deep potential generator active learning deepmd-kit',
     install_requires=install_requires,    
     extras_require={
-        'docs': ['sphinx', 'recommonmark', 'sphinx_rtd_theme>=1.0.0rc1', 'numpydoc', 'deepmodeling_sphinx'],
+        'docs': ['sphinx', 'myst-parser', 'sphinx_rtd_theme>=1.0.0rc1', 'numpydoc', 'deepmodeling_sphinx'],
         "cloudserver": ["oss2", "tqdm"],
         ":python_version<'3.7'": ["typing_extensions"],
     },