Enhance document and scripts (#92)

jinhongyii · web-flow · commit 6d5a1ebc47dd · 2023-05-15T09:53:59.000-04:00
1. fix instructions in readme
2. allow users to specify their own MLC_LLM_HOME path
diff --git a/README.md b/README.md
@@ -55,25 +55,23 @@ Besides supporting WebGPU, this project also provides the harness for other kind
         Currently we support LLaMA and Vicuna.
 
         1. Get the original LLaMA weights in the huggingface format by following the instructions [here](https://huggingface.co/docs/transformers/main/model_doc/llama).
-        2. Use instructions [here](https://github.com/lm-sys/FastChat#vicuna-weights) to get vicuna weights.
-        3. Create a soft link to the model path under dist/models
+        2. Use instructions [here](https://github.com/lm-sys/FastChat#vicuna-weights) to get vicuna weights
+        3. Create a soft link to the model path under mlc-llm/dist/models.
             ```shell
-            mkdir -p dist/models
-            ln -s your_model_path dist/models/model_name
+            mkdir -p mlc-llm/dist/models
+            ln -s your_model_path mlc-llm/dist/models/model_name
 
             # For example:
-            # ln -s path/to/vicuna-7b-v1 dist/models/vicuna-7b-v1
+            # ln -s path/to/vicuna-7b-v1 mlc-llm/dist/models/vicuna-7b-v1
             ```
-    * Optimize and build model to webgpu backend and export the executable to disk in the WebAssembly file format.
-
 
+            If you want to use your own mlc-llm branch, set `MLC_LLM_HOME` to that path and link weights under `$MLC_LLM_HOME/dist/models/model_name`
+    * Optimize and build model to webgpu backend and export the executable to disk in the WebAssembly file format.
         ```shell
-        python3 build.py --target webgpu
-        ```
-        By default `build.py` takes `vicuna-7b-v1` as model name. You can also specify model name as
-        ```shell
-        python3 build.py --target webgpu --model llama-7b
+        ./build.sh --quantization q4f32_0
         ```
+        By default `build.sh` takes `vicuna-7b-v1` as model name
+
         Note: build.py can be run on MacOS with 32GB memory and other OS with at least 50GB CPU memory. We are currently optimizing the memory usage to enable more people to try out locally.
 
 4. Deploy the model on web with WebGPU runtime
diff --git a/build.sh b/build.sh
@@ -0,0 +1,10 @@
+MLC_LLM_HOME_SET="${MLC_LLM_HOME:-}"
+
+if [ -z ${MLC_LLM_HOME_SET} ]; then
+    export MLC_LLM_HOME="${MLC_LLM_HOME:-mlc-llm}"
+fi
+
+
+cd ${MLC_LLM_HOME}
+python build.py --target webgpu ${@}
+cd -
diff --git a/mlc-llm b/mlc-llm
@@ -1 +1 @@
-Subproject commit 62c2aaf74085e1767f8f9f1f60e78dd90306ba56
+Subproject commit 09b8b6e499bf2d70f38ae323d7a6febcb507ce62
diff --git a/scripts/build_site.sh b/scripts/build_site.sh
@@ -15,19 +15,19 @@ cp web/llm_chat.html site/_includes
 cp web/llm_chat.js site/dist/
 cp web/llm_chat.css site/dist/
 
-cp mlc-llm/dist/tvmjs_runtime.wasi.js site/dist
-cp mlc-llm/dist/tvmjs.bundle.js site/dist
-cp -r mlc-llm/dist/sentencepiece site/dist
+cp $MLC_LLM_HOME/dist/tvmjs_runtime.wasi.js site/dist
+cp $MLC_LLM_HOME/dist/tvmjs.bundle.js site/dist
+cp -r $MLC_LLM_HOME/dist/sentencepiece site/dist
 
-if [ -d "mlc-llm/dist/vicuna-v1-7b-q4f32_0/params" ]; then
+if [ -d "$MLC_LLM_HOME/dist/vicuna-v1-7b-q4f32_0/params" ]; then
     mkdir -p site/dist/vicuna-v1-7b-q4f32_0
-    cp -rf mlc-llm/dist/vicuna-v1-7b-q4f32_0/tokenizer.model site/dist/vicuna-v1-7b-q4f32_0/
-    cp -rf mlc-llm/dist/vicuna-v1-7b-q4f32_0/vicuna-v1-7b-q4f32_0-webgpu.wasm site/dist/vicuna-v1-7b-q4f32_0/
+    cp -rf $MLC_LLM_HOME/dist/vicuna-v1-7b-q4f32_0/tokenizer.model site/dist/vicuna-v1-7b-q4f32_0/
+    cp -rf $MLC_LLM_HOME/dist/vicuna-v1-7b-q4f32_0/vicuna-v1-7b-q4f32_0-webgpu.wasm site/dist/vicuna-v1-7b-q4f32_0/
 fi
-if [ -d "mlc-llm/dist/wizardlm-7b/params" ]; then
+if [ -d "$MLC_LLM_HOME/dist/wizardlm-7b/params" ]; then
     mkdir -p site/dist/wizardlm-7b
-    cp -rf mlc-llm/dist/wizardlm-7b/tokenizer.model site/dist/wizardlm-7b/
-    cp -rf mlc-llm/dist/wizardlm-7b/wizardlm-7b-webgpu.wasm site/dist/wizardlm-7b/
+    cp -rf $MLC_LLM_HOME/dist/wizardlm-7b/tokenizer.model site/dist/wizardlm-7b/
+    cp -rf $MLC_LLM_HOME/dist/wizardlm-7b/wizardlm-7b-webgpu.wasm site/dist/wizardlm-7b/
 fi
 
 cd site && jekyll b && cd ..
diff --git a/scripts/local_deploy_site.sh b/scripts/local_deploy_site.sh
@@ -1,18 +1,24 @@
 #!/bin/bash
 set -euxo pipefail
 
+MLC_LLM_HOME_SET="${MLC_LLM_HOME:-}"
+
+if [ -z ${MLC_LLM_HOME_SET} ]; then
+    export MLC_LLM_HOME="${MLC_LLM_HOME:-mlc-llm}"
+fi
+
 scripts/build_site.sh web/global_config.json
 
 echo "symlink parameter location to site.."
 
-if [ -d "mlc-llm/dist/vicuna-v1-7b-q4f32_0/params" ]; then
+if [ -d "$MLC_LLM_HOME/dist/vicuna-v1-7b-q4f32_0/params" ]; then
     rm -rf site/_site/dist/vicuna-v1-7b-q4f32_0-params
-    ln -s `pwd`/mlc-llm/dist/vicuna-v1-7b-q4f32_0/params site/_site/dist/vicuna-v1-7b-q4f32_0/params
+    ln -s $MLC_LLM_HOME/dist/vicuna-v1-7b-q4f32_0/params site/_site/dist/vicuna-v1-7b-q4f32_0/params
     ls site/_site/dist/vicuna-v1-7b-q4f32_0
 fi
-if [ -d "mlc-llm/dist/wizardlm-7b/params" ]; then
+if [ -d "$MLC_LLM_HOME/dist/wizardlm-7b/params" ]; then
     rm -rf site/_site/dist/wizardlm-7b-params
-    ln -s `pwd`/mlc-llm/dist/wizardlm-7b/params site/_site/dist/wizardlm-7b-params
+    ln -s $MLC_LLM_HOME/dist/wizardlm-7b/params site/_site/dist/wizardlm-7b-params
 fi
 
 
diff --git a/scripts/prep_deps.sh b/scripts/prep_deps.sh
@@ -22,8 +22,8 @@ cd ${TVM_HOME}/web && make && npm install && npm run bundle && cd -
 git submodule update --init --recursive
 cd ${SENTENCEPIECE_JS_HOME} && npm install && npm run build && cd -
 git submodule update --init --recursive
-rm -rf mlc-llm/dist/sentencepiece
-cp -r ${SENTENCEPIECE_JS_HOME}/dist mlc-llm/dist/sentencepiece
+rm -rf dist/sentencepiece
+cp -r ${SENTENCEPIECE_JS_HOME}/dist dist/sentencepiece
 
 echo "Exporting tvmjs runtime dist files"
-python3 -c "from tvm.contrib import tvmjs; tvmjs.export_runtime(\"mlc-llm/dist\")"
+python -c "from tvm.contrib import tvmjs; tvmjs.export_runtime(\"dist\")"
diff --git a/scripts/rpc_debug_deploy.sh b/scripts/rpc_debug_deploy.sh
diff --git a/web/llm_chat.html b/web/llm_chat.html
@@ -25,7 +25,7 @@
 <form>
   <select id="model-name">
     <option selected="selected">vicuna-v1-7b-q4f32_0</option>
-    <option >dolly-v1-3b-q4f32_0</option>
+    <!-- <option >dolly-v1-3b-q4f32_0</option> -->
   </select>
 </form>