Fix llama-cli call

init27 · init27 · commit c77394bbea05 · 2024-10-02T12:03:42.000-07:00
diff --git a/recipes/quickstart/agents/Agents_101/Tool_Calling_101.ipynb b/recipes/quickstart/agents/Agents_101/Tool_Calling_101.ipynb
@@ -299,7 +299,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -312,119 +312,98 @@
    "source": [
     "#### Now we can learn about the various prompt formats available \n",
     "\n",
-    "When you run the cell below-you will see all the options for different template"
+    "When you run the cell below-you will see models available and then we can check details for model specific prompts"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 90,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "+-----------+---------------------------------+\n",
-      "\u001b[1m\u001b[97m| Role      | Template Name                   |\u001b[0m\n",
-      "+-----------+---------------------------------+\n",
-      "| user      | user-default                    |\n",
-      "| assistant | assistant-builtin-tool-call     |\n",
-      "| assistant | assistant-custom-tool-call      |\n",
-      "| assistant | assistant-default               |\n",
-      "| system    | system-builtin-and-custom-tools |\n",
-      "| system    | system-builtin-tools-only       |\n",
-      "| system    | system-custom-tools-only        |\n",
-      "| system    | system-default                  |\n",
-      "| tool      | tool-success                    |\n",
-      "| tool      | tool-failure                    |\n",
-      "+-----------+---------------------------------+\n"
+      "Traceback (most recent call last):\n",
+      "  File \"/opt/miniconda3/bin/llama\", line 8, in <module>\n",
+      "    sys.exit(main())\n",
+      "             ^^^^^^\n",
+      "  File \"/opt/miniconda3/lib/python3.12/site-packages/llama_toolchain/cli/llama.py\", line 44, in main\n",
+      "    parser.run(args)\n",
+      "  File \"/opt/miniconda3/lib/python3.12/site-packages/llama_toolchain/cli/llama.py\", line 38, in run\n",
+      "    args.func(args)\n",
+      "  File \"/opt/miniconda3/lib/python3.12/site-packages/llama_toolchain/cli/model/prompt_format.py\", line 59, in _run_model_template_cmd\n",
+      "    raise argparse.ArgumentTypeError(\n",
+      "argparse.ArgumentTypeError: llama3_1 is not a valid Model. Choose one from --\n",
+      "Llama3.1-8B\n",
+      "Llama3.1-70B\n",
+      "Llama3.1-405B\n",
+      "Llama3.1-8B-Instruct\n",
+      "Llama3.1-70B-Instruct\n",
+      "Llama3.1-405B-Instruct\n",
+      "Llama3.2-1B\n",
+      "Llama3.2-3B\n",
+      "Llama3.2-1B-Instruct\n",
+      "Llama3.2-3B-Instruct\n",
+      "Llama3.2-11B-Vision\n",
+      "Llama3.2-90B-Vision\n",
+      "Llama3.2-11B-Vision-Instruct\n",
+      "Llama3.2-90B-Vision-Instruct\n"
      ]
     }
    ],
    "source": [
-    "!llama model template"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Tool Calling: Using the correct Prompt Template\n",
-    "\n",
-    "With `llama-cli` we can learn the correct way of defining `System_prompt` and finally get the correct behaviour from the model"
+    "!llama model prompt-format "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 92,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "+----------+--------------------------------------------------------------+\n",
-      "| Name     | \u001b[1m\u001b[97msystem-builtin-tools-only\u001b[0m                                    |\n",
-      "+----------+--------------------------------------------------------------+\n",
-      "| Template | \u001b[1m\u001b[33m<|begin_of_text|>\u001b[0m\u001b[1m\u001b[33m<|start_header_id|>\u001b[0msystem\u001b[1m\u001b[33m<|end_header_id|>\u001b[0m↵ |\n",
-      "|          | ↵                                                            |\n",
-      "|          | Environment: ipython↵                                        |\n",
-      "|          | Tools: brave_search, wolfram_alpha↵                          |\n",
-      "|          | Cutting Knowledge Date: December 2023↵                       |\n",
-      "|          | Today Date: 15 September 2024↵                               |\n",
-      "|          | ↵                                                            |\n",
-      "|          | You are a helpful assistant.↵                                |\n",
-      "|          | \u001b[1m\u001b[33m<|eot_id|>\u001b[0m\u001b[1m\u001b[33m<|start_header_id|>\u001b[0massistant\u001b[1m\u001b[33m<|end_header_id|>\u001b[0m↵     |\n",
-      "|          | ↵                                                            |\n",
-      "|          |                                                              |\n",
-      "+----------+--------------------------------------------------------------+\n",
-      "| Notes    | ↵ represents newline                                         |\n",
-      "+----------+--------------------------------------------------------------+\n"
+      "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[m━━━━━━━━━━━━━━━━━━━┓\u001b[m\n",
+      "┃                                    \u001b[1mLlama 3.1 - Prompt Formats\u001b[0m                 \u001b[m\u001b[1m\u001b[0m                   ┃\u001b[m\n",
+      "┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[m━━━━━━━━━━━━━━━━━━━┛\u001b[m\n",
+      "\u001b[m\n",
+      "\u001b[m\n",
+      "                                               \u001b[1;4mTokens\u001b[0m                           \u001b[m\u001b[1;4m\u001b[0m                    \u001b[m\n",
+      "\u001b[m\n",
+      "Here is a list of special tokens that are supported by Llama 3.1:               \u001b[m                    \u001b[m\n",
+      "\u001b[m\n",
+      "\u001b[1;33m • \u001b[0m\u001b[1;36;40m<|begin_of_text|>\u001b[0m: Specifies the start of the prompt                         \u001b[m\u001b[1;33m\u001b[0m\u001b[1;36;40m\u001b[0m                    \u001b[m\n",
+      "\u001b[1;33m • \u001b[0m\u001b[1;36;40m<|end_of_text|>\u001b[0m: Model will cease to generate more tokens. This token is gene\u001b[m\u001b[1;33m\u001b[0m\u001b[1;36;40m\u001b[0mrated only by the   \u001b[m\n",
+      "\u001b[1;33m   \u001b[0mbase models.                                                                 \u001b[m\u001b[1;33m\u001b[0m                    \u001b[m\n",
+      "\u001b[1;33m • \u001b[0m\u001b[1;36;40m<|finetune_right_pad_id|>\u001b[0m: This token is used for padding text sequences to t\u001b[m\u001b[1;33m\u001b[0m\u001b[1;36;40m\u001b[0mhe same length in a \u001b[m\n",
+      "\u001b[1;33m   \u001b[0mbatch.                                                                       \u001b[m:\u001b[K"
      ]
     }
    ],
    "source": [
-    "!llama model template --name system-builtin-tools-only"
+    "!llama model prompt-format -m Llama3.1-8B"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "If everything is setup correctly-the model should now wrap function calls  with the `|<python_tag>|` following the actualy function call. \n",
-    "\n",
-    "This can allow you to manage your function calling logic accordingly. \n",
+    "## Tool Calling: Using the correct Prompt Template\n",
     "\n",
-    "Time to test the theory"
+    "With `llama-cli` we have already learned the right behaviour of the model"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 94,
+   "cell_type": "markdown",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "+----------+----------------------------------------------------------------------------------+\n",
-      "| Name     | \u001b[1m\u001b[97massistant-builtin-tool-call\u001b[0m                                                      |\n",
-      "+----------+----------------------------------------------------------------------------------+\n",
-      "| Template | \u001b[1m\u001b[33m<|begin_of_text|>\u001b[0m\u001b[1m\u001b[33m<|start_header_id|>\u001b[0massistant\u001b[1m\u001b[33m<|end_header_id|>\u001b[0m↵                  |\n",
-      "|          | ↵                                                                                |\n",
-      "|          | \u001b[1m\u001b[33m<|python_tag|>\u001b[0mbrave_search.call(query=\"Who won NBA in                            |\n",
-      "|          | 2024?\")\u001b[1m\u001b[33m<|eom_id|>\u001b[0m\u001b[1m\u001b[33m<|start_header_id|>\u001b[0massistant\u001b[1m\u001b[33m<|end_header_id|>\u001b[0m↵                  |\n",
-      "|          | ↵                                                                                |\n",
-      "|          |                                                                                  |\n",
-      "+----------+----------------------------------------------------------------------------------+\n",
-      "| Notes    | ↵ represents newline                                                             |\n",
-      "|          | Notice <|python_tag|>                                                            |\n",
-      "+----------+----------------------------------------------------------------------------------+\n"
-     ]
-    }
-   ],
    "source": [
-    "!llama model template --name assistant-builtin-tool-call"
+    "If everything is setup correctly-the model should now wrap function calls  with the `|<python_tag>|` following the actualy function call. \n",
+    "\n",
+    "This can allow you to manage your function calling logic accordingly. \n",
+    "\n",
+    "Time to test the theory"
    ]
   },
   {
@@ -684,48 +663,7 @@
     "\n",
     "[Here](https://www.llama.com/docs/model-cards-and-prompt-formats/llama3_2#-tool-calling-(1b/3b)-) are the docs for your reference that we will be using. \n",
     "\n",
-    "Let's verify the details from `llama-toolchain` again and then start the prompt engineering for the small Llamas."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Traceback (most recent call last):\n",
-      "  File \"/opt/miniconda3/bin/llama\", line 8, in <module>\n",
-      "    sys.exit(main())\n",
-      "             ^^^^^^\n",
-      "  File \"/opt/miniconda3/lib/python3.12/site-packages/llama_toolchain/cli/llama.py\", line 44, in main\n",
-      "    parser.run(args)\n",
-      "  File \"/opt/miniconda3/lib/python3.12/site-packages/llama_toolchain/cli/llama.py\", line 38, in run\n",
-      "    args.func(args)\n",
-      "  File \"/opt/miniconda3/lib/python3.12/site-packages/llama_toolchain/cli/model/prompt_format.py\", line 59, in _run_model_template_cmd\n",
-      "    raise argparse.ArgumentTypeError(\n",
-      "argparse.ArgumentTypeError: llama3_1 is not a valid Model. Choose one from --\n",
-      "Llama3.1-8B\n",
-      "Llama3.1-70B\n",
-      "Llama3.1-405B\n",
-      "Llama3.1-8B-Instruct\n",
-      "Llama3.1-70B-Instruct\n",
-      "Llama3.1-405B-Instruct\n",
-      "Llama3.2-1B\n",
-      "Llama3.2-3B\n",
-      "Llama3.2-1B-Instruct\n",
-      "Llama3.2-3B-Instruct\n",
-      "Llama3.2-11B-Vision\n",
-      "Llama3.2-90B-Vision\n",
-      "Llama3.2-11B-Vision-Instruct\n",
-      "Llama3.2-90B-Vision-Instruct\n"
-     ]
-    }
-   ],
-   "source": [
-    "!llama model prompt-format"
+    "Excercise for viewer: Use `llama-toolchain` again to verify like we did earlier and then start the prompt engineering for the small Llamas."
    ]
   },
   {
@@ -903,7 +841,16 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Hello Regex, my good old friend :) "
+    "### Handling Tool-Calling logic for the model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Hello Regex, my good old friend :) \n",
+    "\n",
+    "With Regex, we can write a simple way to handle tool_calling and return either the model or tool call response"
    ]
   },
   {