pytorch
diff --git a/‎docs/quantization.md‎
Lines changed: 5 additions & 5 deletions b/‎docs/quantization.md‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎install/.pins/torchao-pin.txt‎
Lines changed: 1 addition & 1 deletion b/‎install/.pins/torchao-pin.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎install/install_requirements.sh‎
Lines changed: 14 additions & 17 deletions b/‎install/install_requirements.sh‎
Lines changed: 14 additions & 17 deletions
diff --git a/‎install/requirements.txt‎
Lines changed: 4 additions & 0 deletions b/‎install/requirements.txt‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎tests/conftest.py‎
Lines changed: 12 additions & 0 deletions b/‎tests/conftest.py‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎tests/test_chat_formatters.py‎
Lines changed: 216 additions & 0 deletions b/‎tests/test_chat_formatters.py‎
Lines changed: 216 additions & 0 deletions
@@ -142,22 +142,22 @@ To use linear:a8wxdq and embedding:wx, you must set up the torchao experimental
 
 From the torchchat root directory, run
 ```
-sh torchchat/utils/scripts/build_torchao_ops.sh
+bash torchchat/utils/scripts/build_torchao_ops.sh
 ```
 
 This should take about 10 seconds to complete.
 
 Note: if you want to use the new kernels in the AOTI and C++ runners, you must pass the flag link_torchao_ops when running the scripts the build the runners.
 
 ```
-sh torchchat/utils/scripts/build_native.sh aoti link_torchao_ops
+bash torchchat/utils/scripts/build_native.sh aoti link_torchao_ops
 ```
 
 ```
-sh torchchat/utils/scripts/build_native.sh et link_torchao_ops
+bash torchchat/utils/scripts/build_native.sh et link_torchao_ops
 ```
 
-Note before running `sh torchchat/utils/scripts/build_native.sh et link_torchao_ops`, you must first install executorch with `sh torchchat/utils/scripts/install_et.sh` if you have not done so already.
+Note before running `bash torchchat/utils/scripts/build_native.sh et link_torchao_ops`, you must first install executorch with `bash torchchat/utils/scripts/install_et.sh` if you have not done so already.
 
 ### Examples
 
@@ -212,7 +212,7 @@ Currently, torchchat can only run them on Eager mode.
 
 From the torchchat root directory, run
 ```
-sh torchchat/utils/scripts/build_torchao_ops.sh mps
+bash torchchat/utils/scripts/build_torchao_ops.sh mps
 ```
 
 ### Examples
 
@@ -1 +1 @@
-7d7c14e898eca3fe66138d2a9445755a9270b800
+2e032c6b0de960dee554dcb08126ace718b14c6d
@@ -44,37 +44,25 @@ fi
 
 echo "Using pip executable: $PIP_EXECUTABLE"
 
-#
-# First install requirements in install/requirements.txt. Older torch may be
-# installed from the dependency of other models. It will be overridden by
-# newer version of torch nightly installed later in this script.
-#
-
-(
-  set -x
-  $PIP_EXECUTABLE install -r install/requirements.txt --extra-index-url https://download.pytorch.org/whl/nightly/cu121
-)
-
 # Since torchchat often uses main-branch features of pytorch, only the nightly
 # pip versions will have the required features. The PYTORCH_NIGHTLY_VERSION value should
 # agree with the third-party/pytorch pinned submodule commit.
 #
 # NOTE: If a newly-fetched version of the executorch repo changes the value of
 # PYTORCH_NIGHTLY_VERSION, you should re-run this script to install the necessary
 # package versions.
-<<<<<<< HEAD
 if [[ -x "$(command -v xpu-smi)" ]];
 then
-  PYTORCH_NIGHTLY_VERSION=dev20241212
+  PYTORCH_NIGHTLY_VERSION=dev20241217
 else
-  PYTORCH_NIGHTLY_VERSION=dev20241213
+  PYTORCH_NIGHTLY_VERSION=dev20241218
 fi
 
 # Nightly version for torchvision
-VISION_NIGHTLY_VERSION=dev20241213
+VISION_NIGHTLY_VERSION=dev20241218
 
 # Nightly version for torchtune
-TUNE_NIGHTLY_VERSION=dev20241126
+TUNE_NIGHTLY_VERSION=dev20241218
 
 # Uninstall triton, as nightly will depend on pytorch-triton, which is one and the same
 (
@@ -99,7 +87,6 @@ else
 fi
 
 # pip packages needed by exir.
-<<<<<<< HEAD
 if [[ -x "$(command -v xpu-smi)" ]];
 then
   REQUIREMENTS_TO_INSTALL=(
@@ -115,6 +102,16 @@ else
   )
 fi
 
+#
+# First install requirements in install/requirements.txt. Older torch may be
+# installed from the dependency of other models. It will be overridden by
+# newer version of torch nightly installed later in this script.
+#
+(
+  set -x
+  $PIP_EXECUTABLE install -r install/requirements.txt --extra-index-url "${TORCH_NIGHTLY_URL}"
+)
+
 # Install the requirements. --extra-index-url tells pip to look for package
 # versions on the provided URL if they aren't available on the default URL.
 (
 
@@ -9,6 +9,10 @@ gguf
 # Tiktoken tokenizer for Llama 3 and other advanced models
 tiktoken
 
+# Tokenizers and jinja2 for other non-llama models that use HF tokenizers
+tokenizers
+jinja2
+
 # Miscellaneous
 snakeviz
 sentencepiece
 
@@ -0,0 +1,12 @@
+"""
+Global pytest config, fixtures, and helpers go here!
+"""
+
+# Standard
+import os
+import sys
+
+# Make sure tests can import torchchat
+sys.path.append(
+    os.path.realpath(os.path.join(os.path.dirname(__file__), ".."))
+)
@@ -0,0 +1,216 @@
+"""
+Unit tests for chat formatters
+"""
+
+# Third Party
+import pytest
+
+# Local
+from torchchat.generate import (
+    HFTokenizerChatFormatter,
+    Llama2ChatFormatter,
+    Llama3ChatFormatter,
+)
+
+## Helpers #####################################################################
+
+class DummyTokenizer:
+    """Dummy tokenizer that encodes as strings so it's easy to check formatting"""
+    def encode(self, text, *_, **__):
+        return text
+
+
+class DummySPTokenizer(DummyTokenizer):
+    """Emulated Sentencepiece tokenizer with bos/eos"""
+    bos = "<s>"
+    eos = "</s>"
+
+
+class DummyLlama3Tokenizer(DummyTokenizer):
+    class _IdentityDict:
+        def __getitem__(self, key):
+            return key
+    special_tokens = _IdentityDict()
+
+
+class DummyHFTokenizer(DummyTokenizer):
+    """Dummy made up chat template scheme"""
+    # Sequence
+    bos = "<bos>"
+    # Turn
+    bot = "<bot>"
+    eot = "<eot>"
+    # Role
+    bor = "<bor>"
+    eor = "<eor>"
+    def apply_chat_template(self, messages, add_generation_prompt):
+        out = [self.bos]
+        role = None
+        for msg in messages:
+            role = msg["role"]
+            content = msg["content"]
+            out.append(f"{self.bot}{self.bor}{role}{self.eor}{content}{self.eot}")
+        if add_generation_prompt and role != "assistant":
+            out.append(f"{self.bot}{self.bor}assistant{self.eor}")
+        return "\n".join(out)
+
+
+def check_rendering(fmt, messages, expected, add_generation_prompt):
+    """Render messages and compare to expected output"""
+    assert "".join(fmt.encode_dialog_prompt(messages, add_generation_prompt)) == expected
+
+
+def make_message(role, text):
+    return {"role": role, "content": text}
+
+
+SYSTEM_PROMPT = "You are a helpful assistant, feel free to ask me anything."
+USER1 = "Hello world!"
+ASSISTANT1 = "Greetings! How can I help you?"
+USER2 = "Why is the sky blue?"
+ASSISTANT2 = "The sky appears blue because of a phenomenon called Rayleigh scattering."
+
+
+# Stock sets of messages to test
+MSGS_NO_SYS= [
+    make_message("user", USER1),
+]
+MSGS_SYS_USR = [
+    make_message("system", SYSTEM_PROMPT),
+    make_message("user", USER1),
+]
+MSGS_SYS_USR_ASST = [
+    make_message("system", SYSTEM_PROMPT),
+    make_message("user", USER1),
+    make_message("assistant", ASSISTANT1),
+]
+MSGS_MULTI_TURN = [
+    make_message("system", SYSTEM_PROMPT),
+    make_message("user", USER1),
+    make_message("assistant", ASSISTANT1),
+    make_message("user", USER2),
+    make_message("assistant", ASSISTANT2),
+]
+
+## Llama2ChatFormatter #########################################################
+
+@pytest.mark.parametrize(
+    ["messages", "expected"],
+    [
+        # single user message (no system prompt)
+        (MSGS_NO_SYS, f"<s>[INST] {USER1} [/INST]"),
+        # sys, usr
+        (MSGS_SYS_USR, f"""<s>[INST] <<SYS>>
+{SYSTEM_PROMPT}
+<</SYS>>
+
+{USER1} [/INST]"""),
+        # sys, usr, asst
+        (MSGS_SYS_USR_ASST, f"""<s>[INST] <<SYS>>
+{SYSTEM_PROMPT}
+<</SYS>>
+
+{USER1} [/INST] {ASSISTANT1} </s>
+"""),
+        # sys, usr, asst, usr, asst
+        (MSGS_MULTI_TURN, f"""<s>[INST] <<SYS>>
+{SYSTEM_PROMPT}
+<</SYS>>
+
+{USER1} [/INST] {ASSISTANT1} </s>
+<s>[INST] {USER2} [/INST] {ASSISTANT2} </s>
+"""),
+    ]
+)
+def test_llama2_chat_formatter(messages, expected):
+    """Tests for Llama2 following the official guide
+    https://www.llama.com/docs/model-cards-and-prompt-formats/meta-llama-2/
+    """
+    tok = DummySPTokenizer()
+    fmt = Llama2ChatFormatter(tok)
+    # NOTE: add_generation_prompt not used by Llama2
+    check_rendering(fmt, messages, expected, True)
+
+## Llama3ChatFormatter #########################################################
+
+@pytest.mark.parametrize(
+    ["messages", "expected"],
+    [
+        # single user message (no system prompt)
+        (MSGS_NO_SYS, f"""<|begin_of_text|><|start_header_id|>user<|end_header_id|>
+
+{USER1}<|eot_id|>"""),
+        # sys, usr
+        (MSGS_SYS_USR, f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
+
+{SYSTEM_PROMPT}<|eot_id|><|start_header_id|>user<|end_header_id|>
+
+{USER1}<|eot_id|>"""),
+        # sys, usr, asst
+        (MSGS_SYS_USR_ASST, f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
+
+{SYSTEM_PROMPT}<|eot_id|><|start_header_id|>user<|end_header_id|>
+
+{USER1}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+
+{ASSISTANT1}<|eot_id|>"""),
+        # sys, usr, asst, usr, asst
+        (MSGS_MULTI_TURN, f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
+
+{SYSTEM_PROMPT}<|eot_id|><|start_header_id|>user<|end_header_id|>
+
+{USER1}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+
+{ASSISTANT1}<|eot_id|><|start_header_id|>user<|end_header_id|>
+
+{USER2}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+
+{ASSISTANT2}<|eot_id|>"""),
+    ]
+)
+@pytest.mark.parametrize("add_generation_prompt", [True, False])
+def test_llama3_chat_formatter(messages, expected, add_generation_prompt):
+    """Tests for Llama3 following the official guide
+    https://www.llama.com/docs/model-cards-and-prompt-formats/meta-llama-3/
+    """
+    tok = DummyLlama3Tokenizer()
+    fmt = Llama3ChatFormatter(tok)
+    # No assistant prompt added if the last message is from the assistant
+    if add_generation_prompt and messages[-1]["role"] != "assistant":
+        expected += "<|start_header_id|>assistant<|end_header_id|>\n\n"
+    check_rendering(fmt, messages, expected, add_generation_prompt)
+
+## HFTokenizerChatFormatter ####################################################
+
+@pytest.mark.parametrize(
+    ["messages", "expected"],
+    [
+        # single user message (no system prompt)
+        (MSGS_NO_SYS, f"""<bos>
+<bot><bor>user<eor>{USER1}<eot>"""),
+        # sys, usr
+        (MSGS_SYS_USR, f"""<bos>
+<bot><bor>system<eor>{SYSTEM_PROMPT}<eot>
+<bot><bor>user<eor>{USER1}<eot>"""),
+        # sys, usr, asst
+        (MSGS_SYS_USR_ASST, f"""<bos>
+<bot><bor>system<eor>{SYSTEM_PROMPT}<eot>
+<bot><bor>user<eor>{USER1}<eot>
+<bot><bor>assistant<eor>{ASSISTANT1}<eot>"""),
+        # sys, usr, asst, usr, asst
+        (MSGS_MULTI_TURN, f"""<bos>
+<bot><bor>system<eor>{SYSTEM_PROMPT}<eot>
+<bot><bor>user<eor>{USER1}<eot>
+<bot><bor>assistant<eor>{ASSISTANT1}<eot>
+<bot><bor>user<eor>{USER2}<eot>
+<bot><bor>assistant<eor>{ASSISTANT2}<eot>"""),
+    ]
+)
+@pytest.mark.parametrize("add_generation_prompt", [True, False])
+def test_hf_chat_formatter(messages, expected, add_generation_prompt):
+    tok = DummyHFTokenizer()
+    fmt = HFTokenizerChatFormatter(tok)
+    # No assistant prompt added if the last message is from the assistant
+    if add_generation_prompt and messages[-1]["role"] != "assistant":
+        expected += f"\n{tok.bot}{tok.bor}assistant{tok.eor}"
+    check_rendering(fmt, messages, expected, add_generation_prompt)
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-7d7c14e898eca3fe66138d2a9445755a9270b800`
	`1`	`+2e032c6b0de960dee554dcb08126ace718b14c6d`