replace groq llama 2 with replicate

jeffxtang · jeffxtang · commit b1939b10c9eb · 2024-05-28T15:18:30.000-07:00
diff --git a/recipes/quickstart/Getting_to_know_Llama.ipynb b/recipes/quickstart/Getting_to_know_Llama.ipynb
@@ -196,7 +196,7 @@
     "### **1.1 - What is Llama 3?**\n",
     "\n",
     "* State of the art (SOTA), Open Source LLM\n",
-    "* 8B, 70B\n",
+    "* 8B, 70B - base and instruct models\n",
     "* Choosing model: Size, Quality, Cost, Speed\n",
     "* Pretrained + Chat\n",
     "* [Meta Llama 3 Blog](https://ai.meta.com/blog/meta-llama-3/)\n",
@@ -275,9 +275,7 @@
    "source": [
     "## **2 - Using and Comparing Llama 3 and Llama 2**\n",
     "\n",
-    "In this notebook, we will use the Llama 2 70b chat and Llama 3 8b and 70b instruct models hosted on [Groq](https://console.groq.com/). You'll need to first [sign in](https://console.groq.com/) with your github or gmail account, then get an [API token](https://console.groq.com/keys) to try Groq out for free. (Groq runs Llama models very fast and they only support one Llama 2 model: the Llama 2 70b chat).\n",
-    "\n",
-    "**Note: You can also use other Llama hosting providers such as [Replicate](https://replicate.com/blog/run-llama-3-with-an-api?input=python), [Togther](https://docs.together.ai/docs/quickstart). Simply click the links here to see how to run `pip install` and use their freel trial API key with example code to modify the following three cells in 2.1 and 2.2.**\n"
+    "We will be using Llama 2 7b & 70b chat and Llama 3 8b & 70b instruct models hosted on [Replicate](https://replicate.com/search?query=llama) to run the examples here. You will need to first sign in with Replicate with your github account, then create a free API token [here](https://replicate.com/account/api-tokens) that you can use for a while. You can also use other Llama 3 cloud providers such as [Groq](https://console.groq.com/), [Together](https://api.together.xyz/playground/language/meta-llama/Llama-3-8b-hf), or [Anyscale](https://app.endpoints.anyscale.com/playground).\n"
    ]
   },
   {
@@ -297,15 +295,15 @@
    },
    "outputs": [],
    "source": [
-    "!pip install groq"
+    "!pip install replicate"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "### **2.2 - Create helpers for Llama 2 and Llama 3**\n",
-    "First, set your Groq API token as environment variables.\n"
+    "First, set your Replicate API token as environment variables.\n"
    ]
   },
   {
@@ -319,16 +317,16 @@
     "import os\n",
     "from getpass import getpass\n",
     "\n",
-    "GROQ_API_TOKEN = getpass()\n",
+    "REPLICATE_API_TOKEN = getpass()\n",
     "\n",
-    "os.environ[\"GROQ_API_KEY\"] = GROQ_API_TOKEN"
+    "os.environ[\"REPLICATE_API_TOKEN\"] = REPLICATE_API_TOKEN"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Create Llama 2 and Llama 3 helper functions - for chatbot type of apps, we'll use Llama 3 8b/70b instruct models, not the base models."
+    "Create Llama 2 and Llama 3 helper functions - for chatbot type of apps, we'll use Llama 3 instruct and Llama 2 chat models, not the base models."
    ]
   },
   {
@@ -339,53 +337,35 @@
    },
    "outputs": [],
    "source": [
-    "from groq import Groq\n",
-    "\n",
-    "client = Groq(\n",
-    "    api_key=os.environ.get(\"GROQ_API_KEY\"),\n",
-    ")\n",
+    "import replicate\n",
     "\n",
-    "def llama2(prompt, temperature=0.0, input_print=True):\n",
-    "  chat_completion = client.chat.completions.create(\n",
-    "      messages=[\n",
-    "          {\n",
-    "              \"role\": \"user\",\n",
-    "              \"content\": prompt,\n",
-    "          }\n",
-    "      ],\n",
-    "      model=\"llama2-70b-4096\",\n",
-    "      temperature=temperature,\n",
-    "  )\n",
+    "def llama2_7b(prompt):\n",
+    "    output = replicate.run(\n",
+    "      \"meta/llama-2-7b-chat\",\n",
+    "      input={\"prompt\": prompt}\n",
+    "    )\n",
+    "    return ''.join(output)\n",
     "\n",
-    "  return (chat_completion.choices[0].message.content)\n",
+    "def llama2_70b(prompt):\n",
+    "    output = replicate.run(\n",
+    "      \"meta/llama-2-70b-chat\",\n",
+    "      input={\"prompt\": prompt}\n",
+    "    )\n",
+    "    return ''.join(output)\n",
     "\n",
-    "def llama3_8b(prompt, temperature=0.0, input_print=True):\n",
-    "  chat_completion = client.chat.completions.create(\n",
-    "      messages=[\n",
-    "          {\n",
-    "              \"role\": \"user\",\n",
-    "              \"content\": prompt,\n",
-    "          }\n",
-    "      ],\n",
-    "      model=\"llama3-8b-8192\",\n",
-    "      temperature=temperature,\n",
-    "  )\n",
+    "def llama3_8b(prompt):\n",
+    "    output = replicate.run(\n",
+    "      \"meta/meta-llama-3-8b-instruct\",\n",
+    "      input={\"prompt\": prompt}\n",
+    "    )\n",
+    "    return ''.join(output)\n",
     "\n",
-    "  return (chat_completion.choices[0].message.content)\n",
-    "\n",
-    "def llama3_70b(prompt, temperature=0.0, input_print=True):\n",
-    "  chat_completion = client.chat.completions.create(\n",
-    "      messages=[\n",
-    "          {\n",
-    "              \"role\": \"user\",\n",
-    "              \"content\": prompt,\n",
-    "          }\n",
-    "      ],\n",
-    "      model=\"llama3-70b-8192\",\n",
-    "      temperature=temperature,\n",
-    "  )\n",
-    "\n",
-    "  return (chat_completion.choices[0].message.content)"
+    "def llama3_70b(prompt):\n",
+    "    output = replicate.run(\n",
+    "      \"meta/meta-llama-3-70b-instruct\",\n",
+    "      input={\"prompt\": prompt}\n",
+    "    )\n",
+    "    return ''.join(output)"
    ]
   },
   {
@@ -406,7 +386,7 @@
    "outputs": [],
    "source": [
     "prompt = \"The typical color of a llama is: \"\n",
-    "output = llama2(prompt)\n",
+    "output = llama2_7b(prompt)\n",
     "md(output)"
    ]
   },
@@ -420,6 +400,16 @@
     "md(output)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "output = llama2_7b(\"The typical color of a llama is what? Answer in one word.\")\n",
+    "md(output)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -430,6 +420,13 @@
     "md(output)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Note: Llama 3 follows instructions better than Llama 2 in single-turn chat.**"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -457,7 +454,7 @@
    "outputs": [],
    "source": [
     "prompt_chat = \"What is the average lifespan of a Llama? Answer the question in few words.\"\n",
-    "output = llama2(prompt_chat)\n",
+    "output = llama2_7b(prompt_chat)\n",
     "md(output)"
    ]
   },
@@ -483,7 +480,7 @@
    "source": [
     "# example without previous context. LLM's are stateless and cannot understand \"they\" without previous context\n",
     "prompt_chat = \"What animal family are they? Answer the question in few words.\"\n",
-    "output = llama2(prompt_chat)\n",
+    "output = llama2_7b(prompt_chat)\n",
     "md(output)"
    ]
   },
@@ -497,6 +494,16 @@
     "md(output)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "output = llama2_70b(prompt_chat)\n",
+    "md(output)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -536,7 +543,7 @@
     "Assistant: 15-20 years.\n",
     "User: What animal family are they?\n",
     "\"\"\"\n",
-    "output = llama2(prompt_chat)\n",
+    "output = llama2_7b(prompt_chat)\n",
     "md(output)"
    ]
   },
@@ -579,7 +586,17 @@
     "\n",
     "Answer the question with one word.\n",
     "\"\"\"\n",
-    "output = llama2(prompt_chat)\n",
+    "output = llama2_7b(prompt_chat)\n",
+    "md(output)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "output = llama2_70b(prompt_chat)\n",
     "md(output)"
    ]
   },
@@ -597,7 +614,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "**Both Llama 3 8b and Llama 2 70b follows instructions (e.g. \"Answer the question with one word\") better than Llama 2 7b.**"
+    "**Both Llama 3 8b and Llama 2 70b follows instructions (e.g. \"Answer the question with one word\") better than Llama 2 7b in multi-turn chat.**"
    ]
   },
   {
@@ -640,7 +657,7 @@
     "\n",
     "Give one word response.\n",
     "'''\n",
-    "output = llama2(prompt)\n",
+    "output = llama2_7b(prompt)\n",
     "md(output)"
    ]
   },
@@ -684,7 +701,7 @@
     "Give one word response.\n",
     "'''\n",
     "\n",
-    "output = llama2(prompt)\n",
+    "output = llama2_7b(prompt)\n",
     "md(output)"
    ]
   },
@@ -704,7 +721,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "**Note: Llama 2, with few shots, has the same output \"Neutral\" as Llama 3.**"
+    "**Note: Llama 2, with few shots, has the same output \"Neutral\" as Llama 3, but Llama 2 doesn't follow instructions (Give one word response) well.**"
    ]
   },
   {
@@ -894,6 +911,7 @@
    "outputs": [],
    "source": [
     "!pip install langchain\n",
+    "!pip install langchain-community\n",
     "!pip install sentence-transformers\n",
     "!pip install faiss-cpu\n",
     "!pip install bs4\n",
@@ -936,40 +954,53 @@
     "vectorstore = FAISS.from_documents(all_splits, HuggingFaceEmbeddings(model_name=\"sentence-transformers/all-mpnet-base-v2\"))"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You'll need to first sign in at [Groq](https://console.groq.com/login) with your github or gmail account, then get an API token to try Groq out for free."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "from langchain_groq import ChatGroq\n",
-    "llm = ChatGroq(temperature=0, model_name=\"llama3-8b-8192\")\n",
+    "import os\n",
+    "from getpass import getpass\n",
     "\n",
-    "from langchain.chains import ConversationalRetrievalChain\n",
-    "chain = ConversationalRetrievalChain.from_llm(llm,\n",
-    "                                              vectorstore.as_retriever(),\n",
-    "                                              return_source_documents=True)\n",
+    "GROQ_API_TOKEN = getpass()\n",
     "\n",
-    "result = chain({\"question\": \"What’s new with Llama 3?\", \"chat_history\": []})\n",
-    "md(result['answer'])\n"
+    "os.environ[\"GROQ_API_KEY\"] = GROQ_API_TOKEN"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "id": "NmEhBe3Kiyre"
-   },
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_groq import ChatGroq\n",
+    "llm = ChatGroq(temperature=0, model_name=\"llama3-8b-8192\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "# Query against your own data\n",
     "from langchain.chains import ConversationalRetrievalChain\n",
-    "chain = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever(), return_source_documents=True)\n",
     "\n",
-    "chat_history = []\n",
-    "query = \"What’s new with Llama 3?\"\n",
-    "result = chain({\"question\": query, \"chat_history\": chat_history})\n",
-    "md(result['answer'])"
+    "# Query against your own data\n",
+    "chain = ConversationalRetrievalChain.from_llm(llm,\n",
+    "                                              vectorstore.as_retriever(),\n",
+    "                                              return_source_documents=True)\n",
+    "\n",
+    "# no chat history passed\n",
+    "result = chain({\"question\": \"What’s new with Llama 3?\", \"chat_history\": []})\n",
+    "md(result['answer'])\n"
    ]
   },
   {
@@ -1083,7 +1114,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.7"
+   "version": "3.10.14"
   }
  },
  "nbformat": 4,