Skip to content

Commit 2708f61

Browse files
committed
refreshing gradio app to use llama3, migrate to use OpenAI APIs
1 parent b203238 commit 2708f61

File tree

1 file changed

+27
-31
lines changed

1 file changed

+27
-31
lines changed

recipes/llama_api_providers/OctoAI_API_examples/Llama2_Gradio.ipynb

Lines changed: 27 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,14 @@
55
"id": "47a9adb3",
66
"metadata": {},
77
"source": [
8-
"## This demo app shows how to query Llama 2 using the Gradio UI.\n",
8+
"## This demo app shows how to query Llama 3 using the Gradio UI.\n",
99
"\n",
1010
"Since we are using OctoAI in this example, you'll need to obtain an OctoAI token:\n",
1111
"\n",
1212
"- You will need to first sign into [OctoAI](https://octoai.cloud/) with your Github or Google account\n",
1313
"- Then create a free API token [here](https://octo.ai/docs/getting-started/how-to-create-an-octoai-access-token) that you can use for a while (a month or $10 in OctoAI credits, whichever one runs out first)\n",
1414
"\n",
15-
"**Note** After the free trial ends, you will need to enter billing info to continue to use Llama2 hosted on OctoAI.\n",
15+
"**Note** After the free trial ends, you will need to enter billing info to continue to use Llama 3 hosted on OctoAI.\n",
1616
"\n",
1717
"To run this example:\n",
1818
"- Run the notebook\n",
@@ -22,8 +22,7 @@
2222
"In the notebook or a browser with URL http://127.0.0.1:7860 you should see a UI with your answer.\n",
2323
"\n",
2424
"Let's start by installing the necessary packages:\n",
25-
"- langchain provides necessary RAG tools for this demo\n",
26-
"- octoai-sdk allows us to use OctoAI Llama 2 endpoint\n",
25+
"- openai for us to use its APIs to talk to the OctoAI endpoint\n",
2726
"- gradio is used for the UI elements\n",
2827
"\n",
2928
"And setting up the OctoAI token."
@@ -36,7 +35,7 @@
3635
"metadata": {},
3736
"outputs": [],
3837
"source": [
39-
"!pip install langchain octoai-sdk gradio"
38+
"!pip install openai gradio"
4039
]
4140
},
4241
{
@@ -60,37 +59,34 @@
6059
"metadata": {},
6160
"outputs": [],
6261
"source": [
63-
"from langchain.schema import AIMessage, HumanMessage\n",
6462
"import gradio as gr\n",
65-
"from langchain.llms.octoai_endpoint import OctoAIEndpoint\n",
63+
"import openai\n",
6664
"\n",
67-
"llama2_13b = \"llama-2-13b-chat-fp16\"\n",
68-
"\n",
69-
"llm = OctoAIEndpoint(\n",
70-
" endpoint_url=\"https://text.octoai.run/v1/chat/completions\",\n",
71-
" model_kwargs={\n",
72-
" \"model\": llama2_13b,\n",
73-
" \"messages\": [\n",
74-
" {\n",
75-
" \"role\": \"system\",\n",
76-
" \"content\": \"You are a helpful, respectful and honest assistant.\"\n",
77-
" }\n",
78-
" ],\n",
79-
" \"max_tokens\": 500,\n",
80-
" \"top_p\": 1,\n",
81-
" \"temperature\": 0.01\n",
82-
" },\n",
65+
"# Init OctoAI client\n",
66+
"client = openai.OpenAI(\n",
67+
" base_url=\"https://text.octoai.run/v1\",\n",
68+
" api_key=os.environ[\"OCTOAI_API_TOKEN\"]\n",
8369
")\n",
8470
"\n",
85-
"\n",
8671
"def predict(message, history):\n",
87-
" history_langchain_format = []\n",
88-
" for human, ai in history:\n",
89-
" history_langchain_format.append(HumanMessage(content=human))\n",
90-
" history_langchain_format.append(AIMessage(content=ai))\n",
91-
" history_langchain_format.append(HumanMessage(content=message))\n",
92-
" llm_response = llm(message, history_langchain_format)\n",
93-
" return llm_response.content\n",
72+
" history_openai_format = []\n",
73+
" for human, assistant in history:\n",
74+
" history_openai_format.append({\"role\": \"user\", \"content\": human})\n",
75+
" history_openai_format.append({\"role\": \"assistant\", \"content\": assistant})\n",
76+
" history_openai_format.append({\"role\": \"user\", \"content\": message})\n",
77+
"\n",
78+
" response = client.chat.completions.create(\n",
79+
" model = 'meta-llama-3-70b-instruct',\n",
80+
" messages = history_openai_format,\n",
81+
" temperature = 0.0,\n",
82+
" stream = True\n",
83+
" )\n",
84+
"\n",
85+
" partial_message = \"\"\n",
86+
" for chunk in response:\n",
87+
" if chunk.choices[0].delta.content is not None:\n",
88+
" partial_message = partial_message + chunk.choices[0].delta.content\n",
89+
" yield partial_message\n",
9490
"\n",
9591
"gr.ChatInterface(predict).launch()"
9692
]

0 commit comments

Comments
 (0)