|
5 | 5 | "id": "47a9adb3",
|
6 | 6 | "metadata": {},
|
7 | 7 | "source": [
|
8 |
| - "## This demo app shows how to query Llama 2 using the Gradio UI.\n", |
| 8 | + "## This demo app shows how to query Llama 3 using the Gradio UI.\n", |
9 | 9 | "\n",
|
10 | 10 | "Since we are using OctoAI in this example, you'll need to obtain an OctoAI token:\n",
|
11 | 11 | "\n",
|
12 | 12 | "- You will need to first sign into [OctoAI](https://octoai.cloud/) with your Github or Google account\n",
|
13 | 13 | "- Then create a free API token [here](https://octo.ai/docs/getting-started/how-to-create-an-octoai-access-token) that you can use for a while (a month or $10 in OctoAI credits, whichever one runs out first)\n",
|
14 | 14 | "\n",
|
15 |
| - "**Note** After the free trial ends, you will need to enter billing info to continue to use Llama2 hosted on OctoAI.\n", |
| 15 | + "**Note** After the free trial ends, you will need to enter billing info to continue to use Llama 3 hosted on OctoAI.\n", |
16 | 16 | "\n",
|
17 | 17 | "To run this example:\n",
|
18 | 18 | "- Run the notebook\n",
|
|
22 | 22 | "In the notebook or a browser with URL http://127.0.0.1:7860 you should see a UI with your answer.\n",
|
23 | 23 | "\n",
|
24 | 24 | "Let's start by installing the necessary packages:\n",
|
25 |
| - "- langchain provides necessary RAG tools for this demo\n", |
26 |
| - "- octoai-sdk allows us to use OctoAI Llama 2 endpoint\n", |
| 25 | + "- openai for us to use its APIs to talk to the OctoAI endpoint\n", |
27 | 26 | "- gradio is used for the UI elements\n",
|
28 | 27 | "\n",
|
29 | 28 | "And setting up the OctoAI token."
|
|
36 | 35 | "metadata": {},
|
37 | 36 | "outputs": [],
|
38 | 37 | "source": [
|
39 |
| - "!pip install langchain octoai-sdk gradio" |
| 38 | + "!pip install openai gradio" |
40 | 39 | ]
|
41 | 40 | },
|
42 | 41 | {
|
|
60 | 59 | "metadata": {},
|
61 | 60 | "outputs": [],
|
62 | 61 | "source": [
|
63 |
| - "from langchain.schema import AIMessage, HumanMessage\n", |
64 | 62 | "import gradio as gr\n",
|
65 |
| - "from langchain.llms.octoai_endpoint import OctoAIEndpoint\n", |
| 63 | + "import openai\n", |
66 | 64 | "\n",
|
67 |
| - "llama2_13b = \"llama-2-13b-chat-fp16\"\n", |
68 |
| - "\n", |
69 |
| - "llm = OctoAIEndpoint(\n", |
70 |
| - " endpoint_url=\"https://text.octoai.run/v1/chat/completions\",\n", |
71 |
| - " model_kwargs={\n", |
72 |
| - " \"model\": llama2_13b,\n", |
73 |
| - " \"messages\": [\n", |
74 |
| - " {\n", |
75 |
| - " \"role\": \"system\",\n", |
76 |
| - " \"content\": \"You are a helpful, respectful and honest assistant.\"\n", |
77 |
| - " }\n", |
78 |
| - " ],\n", |
79 |
| - " \"max_tokens\": 500,\n", |
80 |
| - " \"top_p\": 1,\n", |
81 |
| - " \"temperature\": 0.01\n", |
82 |
| - " },\n", |
| 65 | + "# Init OctoAI client\n", |
| 66 | + "client = openai.OpenAI(\n", |
| 67 | + " base_url=\"https://text.octoai.run/v1\",\n", |
| 68 | + " api_key=os.environ[\"OCTOAI_API_TOKEN\"]\n", |
83 | 69 | ")\n",
|
84 | 70 | "\n",
|
85 |
| - "\n", |
86 | 71 | "def predict(message, history):\n",
|
87 |
| - " history_langchain_format = []\n", |
88 |
| - " for human, ai in history:\n", |
89 |
| - " history_langchain_format.append(HumanMessage(content=human))\n", |
90 |
| - " history_langchain_format.append(AIMessage(content=ai))\n", |
91 |
| - " history_langchain_format.append(HumanMessage(content=message))\n", |
92 |
| - " llm_response = llm(message, history_langchain_format)\n", |
93 |
| - " return llm_response.content\n", |
| 72 | + " history_openai_format = []\n", |
| 73 | + " for human, assistant in history:\n", |
| 74 | + " history_openai_format.append({\"role\": \"user\", \"content\": human})\n", |
| 75 | + " history_openai_format.append({\"role\": \"assistant\", \"content\": assistant})\n", |
| 76 | + " history_openai_format.append({\"role\": \"user\", \"content\": message})\n", |
| 77 | + "\n", |
| 78 | + " response = client.chat.completions.create(\n", |
| 79 | + " model = 'meta-llama-3-70b-instruct',\n", |
| 80 | + " messages = history_openai_format,\n", |
| 81 | + " temperature = 0.0,\n", |
| 82 | + " stream = True\n", |
| 83 | + " )\n", |
| 84 | + "\n", |
| 85 | + " partial_message = \"\"\n", |
| 86 | + " for chunk in response:\n", |
| 87 | + " if chunk.choices[0].delta.content is not None:\n", |
| 88 | + " partial_message = partial_message + chunk.choices[0].delta.content\n", |
| 89 | + " yield partial_message\n", |
94 | 90 | "\n",
|
95 | 91 | "gr.ChatInterface(predict).launch()"
|
96 | 92 | ]
|
|
0 commit comments