Change vocab size to support vicuna v1 (#33)

jinhongyii · web-flow · commit e990bab78443 · 2023-04-21T00:52:39.000-04:00
Vicuna v0's vocab_size is 32001, but v1's vocab size is 32000. So we
need to update the manual schedule.
diff --git a/build.py b/build.py
@@ -17,7 +17,7 @@
 
 def _parse_args():
     args = argparse.ArgumentParser()
-    args.add_argument("--model", type=str, default="vicuna-7b")
+    args.add_argument("--model", type=str, default="vicuna-7b-v1")
     args.add_argument("--target", type=str, default="auto")
     args.add_argument("--db-path", type=str, default="log_db/")
     args.add_argument("--artifact-path", type=str, default="dist")
diff --git a/chat.py b/chat.py
@@ -16,7 +16,7 @@ def _parse_args():
     args.add_argument("--device-name", type=str, default="auto")
     args.add_argument("--debug-dump", action="store_true", default=False)
     args.add_argument("--artifact-path", type=str, default="dist")
-    args.add_argument("--model", type=str, default="vicuna-7b")
+    args.add_argument("--model", type=str, default="vicuna-7b-v1")
     args.add_argument("--max-gen-len", type=int, default=128)
     args.add_argument("--run-torch-model", action="store_true", default=False)
     parsed = args.parse_args()
@@ -101,7 +101,7 @@ def sample_top_p(probs, p):
 def chat(model_wrapper, args):
 
     # Chat
-    conv = conv_templates["v1"].copy()
+    conv = conv_templates["vicuna_v1.1"].copy()
     while True:
         try:
             inp = input(f"{conv.roles[0]}: ")
diff --git a/evaluate.py b/evaluate.py
@@ -22,7 +22,7 @@ def _parse_args():
     args.add_argument("--debug-dump", action="store_true", default=False)
     args.add_argument("--artifact-path", type=str, default="dist")
     args.add_argument("--prompt", type=str, default="The capital of Canada is")
-    args.add_argument("--model", type=str, default="vicuna-7b")
+    args.add_argument("--model", type=str, default="vicuna-7b-v1")
     args.add_argument("--profile", action="store_true", default=False)
     parsed = args.parse_args()
     parsed.model_path = os.path.join(parsed.artifact_path, "models", parsed.model)
diff --git a/scripts/build_site.sh b/scripts/build_site.sh
@@ -19,10 +19,10 @@ cp dist/tvmjs_runtime.wasi.js site/dist
 cp dist/tvmjs.bundle.js site/dist
 cp -r dist/sentencepiece site/dist
 
-if [ -d "dist/vicuna-7b/params" ]; then
-    mkdir -p site/dist/vicuna-7b
-    cp -rf dist/models/vicuna-7b/tokenizer.model site/dist/vicuna-7b/
-    cp -rf dist/vicuna-7b/vicuna-7b_webgpu.wasm site/dist/vicuna-7b/
+if [ -d "dist/vicuna-7b-v1/params" ]; then
+    mkdir -p site/dist/vicuna-7b-v1
+    cp -rf dist/models/vicuna-7b-v1/tokenizer.model site/dist/vicuna-7b-v1/
+    cp -rf dist/vicuna-7b-v1/vicuna-7b-v1_webgpu.wasm site/dist/vicuna-7b-v1/
 fi
 
 cd site && jekyll b && cd ..
diff --git a/scripts/local_deploy_site.sh b/scripts/local_deploy_site.sh
@@ -5,8 +5,8 @@ scripts/build_site.sh web/local-config.json
 
 echo "symlink parameter location to site.."
 
-if [ -d "dist/vicuna-7b/params" ]; then
-    rm -rf site/_site/vicuna-7b-params
-    ln -s `pwd`/dist/vicuna-7b/params site/_site/vicuna-7b-params
+if [ -d "dist/vicuna-7b-v1/params" ]; then
+    rm -rf site/_site/vicuna-7b-v1-params
+    ln -s `pwd`/dist/vicuna-7b-v1/params site/_site/vicuna-7b-v1-params
 fi
 cd site && jekyll serve  --skip-initial-build --host localhost --baseurl /web-llm --port 8888
diff --git a/scripts/rpc_debug_deploy.sh b/scripts/rpc_debug_deploy.sh
@@ -19,10 +19,10 @@ cp web/local-config.json ${TVM_HOME}/web/dist/www/llm-chat-config.json
 rm -rf ${TVM_HOME}/web/dist/www/dist/sentencepiece
 cp -rf dist/sentencepiece ${TVM_HOME}/web/dist/www/dist/
 
-if [ -d "dist/vicuna-7b/params" ]; then
-    mkdir -p ${TVM_HOME}/web/dist/www/dist/vicuna-7b
-    cp -rf dist/models/vicuna-7b/tokenizer.model ${TVM_HOME}/web/dist/www/dist/vicuna-7b/
-    cp -rf dist/vicuna-7b/vicuna-7b_webgpu.wasm ${TVM_HOME}/web/dist/www/dist/vicuna-7b/
-    rm -rf ${TVM_HOME}/web/.ndarray_cache/vicuna-7b-params
-    ln -s `pwd`/dist/vicuna-7b/params ${TVM_HOME}/web/.ndarray_cache/vicuna-7b-params
+if [ -d "dist/vicuna-7b-v1/params" ]; then
+    mkdir -p ${TVM_HOME}/web/dist/www/dist/vicuna-7b-v1
+    cp -rf dist/models/vicuna-7b-v1/tokenizer.model ${TVM_HOME}/web/dist/www/dist/vicuna-7b-v1/
+    cp -rf dist/vicuna-7b-v1/vicuna-7b-v1_webgpu.wasm ${TVM_HOME}/web/dist/www/dist/vicuna-7b-v1/
+    rm -rf ${TVM_HOME}/web/.ndarray_cache/vicuna-7b-v1-params
+    ln -s `pwd`/dist/vicuna-7b-v1/params ${TVM_HOME}/web/.ndarray_cache/vicuna-7b-v1-params
 fi
diff --git a/site/index.md b/site/index.md
@@ -37,7 +37,7 @@ If you have a Mac computer with Apple silicon, here are the instructions for you
 
 ## Chat Demo
 
-The chat demo is based on [vicuna-7b-v0](https://huggingface.co/lmsys/vicuna-7b-delta-v0) model. More model support are on the way.
+The chat demo is based on [vicuna-7b-v1.1](https://huggingface.co/lmsys/vicuna-7b-delta-v1.1) model. More model support are on the way.
 
 {% include llm_chat.html %}
 
diff --git a/web/gh-page-config.json b/web/gh-page-config.json
@@ -4,9 +4,9 @@
         "shape": [32, 32, 128],
         "dtype": "float32"
     },
-    "wasmUrl": "dist/vicuna-7b/vicuna-7b_webgpu.wasm",
+    "wasmUrl": "dist/vicuna-7b-v1/vicuna-7b-v1_webgpu.wasm",
     "cacheUrl": "https://huggingface.co/mlc-ai/web-lm/resolve/main/vicuna-0b/",
-    "tokenizer": "dist/vicuna-7b/tokenizer.model",
+    "tokenizer": "dist/vicuna-7b-v1/tokenizer.model",
     "maxGenLength": 512,
     "meanGenLength": 128,
     "maxWindowLength": 2048
diff --git a/web/llm_chat.js b/web/llm_chat.js
@@ -51,35 +51,13 @@ class Conversation {
 
 function defaultConversation(maxWindowLength = 512) {
   return new Conversation({
-    system: "A chat between a curious human and an artificial intelligence assistant. " +
-            "The assistant gives helpful, detailed, and polite answers to the human's questions.",
-    roles: ["Human", "Assistant"],
+    system: "A chat between a curious user and an artificial intelligence assistant. " +
+            "The assistant gives helpful, detailed, and polite answers to the user's questions.",
+    roles: ["USER", "ASSISTANT"],
     maxWindowLength: maxWindowLength,
-    messages: [
-        ["Human", "What are the key differences between renewable and non-renewable energy sources?"],
-        [ "Assistant",
-          "Renewable energy sources are those that can be replenished naturally in a relatively " +
-          "short amount of time, such as solar, wind, hydro, geothermal, and biomass. " +
-          "Non-renewable energy sources, on the other hand, are finite and will eventually be " +
-          "depleted, such as coal, oil, and natural gas. Here are some key differences between " +
-          "renewable and non-renewable energy sources:\n" +
-          "1. Availability: Renewable energy sources are virtually inexhaustible, while non-renewable " +
-          "energy sources are finite and will eventually run out.\n" +
-          "2. Environmental impact: Renewable energy sources have a much lower environmental impact " +
-          "than non-renewable sources, which can lead to air and water pollution, greenhouse gas emissions, " +
-          "and other negative effects.\n" +
-          "3. Cost: Renewable energy sources can be more expensive to initially set up, but they typically " +
-          "have lower operational costs than non-renewable sources.\n" +
-          "4. Reliability: Renewable energy sources are often more reliable and can be used in more remote " +
-          "locations than non-renewable sources.\n" +
-          "5. Flexibility: Renewable energy sources are often more flexible and can be adapted to different " +
-          "situations and needs, while non-renewable sources are more rigid and inflexible.\n" +
-          "6. Sustainability: Renewable energy sources are more sustainable over the long term, while " +
-          "non-renewable sources are not, and their depletion can lead to economic and social instability.\n"
-        ]
-    ],
-    offset: 2,
-    seps:["###"],
+    messages: [],
+    offset: 0,
+    seps:[" ", "</s>"],
   });
 };
 
@@ -307,6 +285,7 @@ class LLMChatPipeline {
         callbackUpdateResponse(step, outputPrompt);
       }
     }
+    this.conversation.messages[this.conversation.messages.length - 1][1] = outputPrompt;
     return outputPrompt;
   }
 
diff --git a/web/local-config.json b/web/local-config.json
@@ -4,9 +4,9 @@
         "shape": [32, 32, 128],
         "dtype": "float32"
     },
-    "wasmUrl": "dist/vicuna-7b/vicuna-7b_webgpu.wasm",
-    "cacheUrl": "vicuna-7b-params/",
-    "tokenizer": "dist/vicuna-7b/tokenizer.model",
+    "wasmUrl": "dist/vicuna-7b-v1/vicuna-7b-v1_webgpu.wasm",
+    "cacheUrl": "vicuna-7b-v1-params/",
+    "tokenizer": "dist/vicuna-7b-v1/tokenizer.model",
     "maxGenLength": 512,
     "meanGenLength": 128,
     "maxWindowLength": 1024
diff --git a/web_llm/conversation.py b/web_llm/conversation.py
@@ -1,21 +1,19 @@
 # copied from vicuna repo
-
 import dataclasses
 from enum import auto, Enum
 from typing import List, Tuple, Any
 
 
 class SeparatorStyle(Enum):
     """Different separator style."""
-
     SINGLE = auto()
     TWO = auto()
+    DOLLY = auto()
 
 
 @dataclasses.dataclass
 class Conversation:
     """A class that keeps all conversation history."""
-
     system: str
     roles: List[str]
     messages: List[List[str]]
@@ -24,17 +22,18 @@ class Conversation:
     sep: str = "###"
     sep2: str = None
 
+    # Used for gradio server
     skip_next: bool = False
     conv_id: Any = None
 
     def get_prompt(self):
         if self.sep_style == SeparatorStyle.SINGLE:
-            ret = self.system + self.sep
+            ret = self.system
             for role, message in self.messages:
                 if message:
-                    ret += role + ": " + message + self.sep
+                    ret += self.sep + " " + role + ": " + message
                 else:
-                    ret += role + ":"
+                    ret += self.sep + " " + role + ":"
             return ret
         elif self.sep_style == SeparatorStyle.TWO:
             seps = [self.sep, self.sep2]
@@ -45,6 +44,17 @@ def get_prompt(self):
                 else:
                     ret += role + ":"
             return ret
+        elif self.sep_style == SeparatorStyle.DOLLY:
+            seps = [self.sep, self.sep2]
+            ret = self.system
+            for i, (role, message) in enumerate(self.messages):
+                if message:
+                    ret += role + ":\n" + message + seps[i % 2]
+                    if i % 2 == 1:
+                        ret += "\n\n"
+                else:
+                    ret += role + ":\n"
+            return ret
         else:
             raise ValueError(f"Invalid style: {self.sep_style}")
 
@@ -53,7 +63,7 @@ def append_message(self, role, message):
 
     def to_gradio_chatbot(self):
         ret = []
-        for i, (role, msg) in enumerate(self.messages[self.offset :]):
+        for i, (role, msg) in enumerate(self.messages[self.offset:]):
             if i % 2 == 0:
                 ret.append([msg, None])
             else:
@@ -69,8 +79,7 @@ def copy(self):
             sep_style=self.sep_style,
             sep=self.sep,
             sep2=self.sep2,
-            conv_id=self.conv_id,
-        )
+            conv_id=self.conv_id)
 
     def dict(self):
         return {
@@ -84,46 +93,13 @@ def dict(self):
         }
 
 
-conv_v1 = Conversation(
-    system="A chat between a curious human and an artificial intelligence assistant. "
-    "The assistant gives helpful, detailed, and polite answers to the human's questions.",
-    roles=("Human", "Assistant"),
-    messages=(
-        ("Human", "Give three tips for staying healthy."),
-        (
-            "Assistant",
-            "Sure, here are three tips for staying healthy:\n"
-            "1. Exercise regularly: Regular physical activity can help improve your overall health and wellbeing. "
-            "It can also help reduce your risk of chronic conditions such as obesity, diabetes, heart disease, "
-            "and certain cancers. Aim for at least 150 minutes of moderate-intensity aerobic exercise or "
-            "75 minutes of vigorous-intensity aerobic exercise per week, along with muscle-strengthening "
-            "activities at least two days per week.\n"
-            "2. Eat a balanced diet: Eating a balanced diet that is rich in fruits, "
-            "vegetables, whole grains, lean proteins, and healthy fats can help support "
-            "your overall health. Try to limit your intake of processed and high-sugar foods, "
-            "and aim to drink plenty of water throughout the day.\n"
-            "3. Get enough sleep: Getting enough quality sleep is essential for your physical "
-            "and mental health. Adults should aim for seven to nine hours of sleep per night. "
-            "Establish a regular sleep schedule and try to create a relaxing bedtime routine to "
-            "help improve the quality of your sleep.",
-        ),
-    ),
-    offset=2,
-    sep_style=SeparatorStyle.SINGLE,
-    sep="###",
-)
-
-conv_v1_2 = Conversation(
+conv_one_shot = Conversation(
     system="A chat between a curious human and an artificial intelligence assistant. "
-    "The assistant gives helpful, detailed, and polite answers to the human's questions.",
+           "The assistant gives helpful, detailed, and polite answers to the human's questions.",
     roles=("Human", "Assistant"),
     messages=(
-        (
-            "Human",
-            "What are the key differences between renewable and non-renewable energy sources?",
-        ),
-        (
-            "Assistant",
+        ("Human", "What are the key differences between renewable and non-renewable energy sources?"),
+        ("Assistant",
             "Renewable energy sources are those that can be replenished naturally in a relatively "
             "short amount of time, such as solar, wind, hydro, geothermal, and biomass. "
             "Non-renewable energy sources, on the other hand, are finite and will eventually be "
@@ -141,15 +117,27 @@ def dict(self):
             "5. Flexibility: Renewable energy sources are often more flexible and can be adapted to different "
             "situations and needs, while non-renewable sources are more rigid and inflexible.\n"
             "6. Sustainability: Renewable energy sources are more sustainable over the long term, while "
-            "non-renewable sources are not, and their depletion can lead to economic and social instability.\n",
-        ),
+            "non-renewable sources are not, and their depletion can lead to economic and social instability.")
     ),
     offset=2,
     sep_style=SeparatorStyle.SINGLE,
     sep="###",
 )
 
-conv_bair_v1 = Conversation(
+
+conv_vicuna_v1_1 = Conversation(
+    system="A chat between a curious user and an artificial intelligence assistant. "
+           "The assistant gives helpful, detailed, and polite answers to the user's questions.",
+    roles=("USER", "ASSISTANT"),
+    messages=(),
+    offset=0,
+    sep_style=SeparatorStyle.TWO,
+    sep=" ",
+    sep2="</s>",
+)
+
+
+conv_koala_v1 = Conversation(
     system="BEGINNING OF CONVERSATION:",
     roles=("USER", "GPT"),
     messages=(),
@@ -159,13 +147,31 @@ def dict(self):
     sep2="</s>",
 )
 
+conv_dolly = Conversation(
+    system=
+    "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n",
+    roles=('### Instruction', '### Response'),
+    messages=(),
+    offset=0,
+    sep_style=SeparatorStyle.DOLLY,
+    sep="\n\n",
+    sep2="### End",
+)
 
-default_conversation = conv_v1_2
 conv_templates = {
-    "v1": conv_v1_2,
-    "bair_v1": conv_bair_v1,
+    "conv_one_shot": conv_one_shot,
+    "vicuna_v1.1": conv_vicuna_v1_1,
+    "koala_v1": conv_koala_v1,
+    "dolly": conv_dolly,
 }
 
 
-if __name__ == "__main__":
-    print(default_conversation.get_prompt())
+def get_default_conv_template(model_name):
+    model_name = model_name.lower()
+    if "vicuna" in model_name or "output" in model_name:
+        return conv_vicuna_v1_1
+    elif "koala" in model_name:
+        return conv_koala_v1
+    elif "dolly" in model_name:
+        return conv_dolly
+    return conv_one_shot
diff --git a/web_llm/transform/dispatch_tir_operator.py b/web_llm/transform/dispatch_tir_operator.py