Skip to content

Commit a319477

Browse files
authored
Merge pull request #886 from feloy/feat/chatbot-llama-stack-recipe
feat: add chatbot recipe using llama stack
2 parents dcd0bfb + 1fa23c7 commit a319477

File tree

5 files changed

+128
-0
lines changed

5 files changed

+128
-0
lines changed
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Chat Application
2+
3+
This recipe helps developers start building their own custom LLM enabled chat applications.
4+
5+
There are a few options today for local Model Serving, but this recipe will use [`Llama Stack`](https://llama-stack.readthedocs.io/en/latest/).
6+
7+
The AI Application will connect to the Model Service via its API. The recipe relies on [Llama Stack Client Python SDK](https://github.com/meta-llama/llama-stack-client-python) to simplify communication with the Model Service and uses [Streamlit](https://streamlit.io/) for the UI layer.
8+
9+
## Try the Chat Application
10+
11+
The [Podman Desktop](https://podman-desktop.io) [AI Lab Extension](https://github.com/containers/podman-desktop-extension-ai-lab) includes this recipe among others. To try it out, open `Recipes Catalog` -> `Chatbot using Llama Stack` and follow the instructions to start the application.
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
version: v1.0
2+
application:
3+
type: language
4+
name: ChatBot_Streamlit_LlamaStack
5+
description: Chat with a model service in a web frontend.
6+
containers:
7+
- name: streamlit-llamastack-chat-app
8+
contextdir: app
9+
containerfile: Containerfile
10+
arch:
11+
- arm64
12+
- amd64
13+
ports:
14+
- 8501
15+
image: quay.io/ai-lab/chatbot-llama-stack:latest
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
FROM registry.access.redhat.com/ubi9/python-311:1-77.1726664316
2+
WORKDIR /chat
3+
COPY requirements.txt .
4+
RUN pip install --upgrade pip
5+
RUN pip install --no-cache-dir --upgrade -r /chat/requirements.txt
6+
COPY chatbot_ui.py .
7+
EXPOSE 8501
8+
ENTRYPOINT [ "streamlit", "run", "chatbot_ui.py" ]
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
from llama_stack_client import LlamaStackClient
2+
from llama_stack_client.types.shared_params.user_message import UserMessage
3+
4+
import streamlit as st
5+
import requests
6+
import time
7+
import os
8+
9+
model_service_base = os.getenv("MODEL_ENDPOINT",
10+
"http://localhost:8001")
11+
model_service = f"{model_service_base}/v1"
12+
request_kwargs = {}
13+
14+
client = LlamaStackClient(
15+
base_url=model_service_base,
16+
)
17+
18+
@st.cache_resource(show_spinner=False)
19+
def checking_model_service():
20+
start = time.time()
21+
print("Checking Model Service Availability...")
22+
ready = False
23+
while not ready:
24+
try:
25+
request_cpp = requests.get(f'{model_service}/models', **request_kwargs)
26+
if request_cpp.status_code == 200:
27+
ready = True
28+
except Exception as inst:
29+
print(inst)
30+
pass
31+
time.sleep(1)
32+
print(f"Model Service Available")
33+
print(f"{time.time()-start} seconds")
34+
return id
35+
36+
with st.spinner("Checking Model Service Availability..."):
37+
checking_model_service()
38+
39+
def enableInput():
40+
st.session_state["input_disabled"] = False
41+
42+
def disableInput():
43+
st.session_state["input_disabled"] = True
44+
45+
st.title("💬 Chatbot")
46+
if "messages" not in st.session_state:
47+
st.session_state["messages"] = [{"role": "assistant",
48+
"content": "How can I help you?"}]
49+
if "input_disabled" not in st.session_state:
50+
enableInput()
51+
52+
with st.sidebar:
53+
st.header("Select a model")
54+
available_models = client.models.list()
55+
available_models = [model.identifier for model in available_models if model.api_model_type == "llm"]
56+
selected_model = st.selectbox(
57+
"Choose a model",
58+
available_models,
59+
index=0,
60+
)
61+
62+
for msg in st.session_state.messages:
63+
st.chat_message(msg["role"]).write(msg["content"])
64+
65+
if prompt := st.chat_input(disabled=st.session_state["input_disabled"],on_submit=disableInput):
66+
message_placeholder = st.empty()
67+
full_response = ""
68+
69+
st.session_state.messages.append({"role": "user", "content": prompt})
70+
71+
if selected_model:
72+
response = client.inference.chat_completion(
73+
messages=[
74+
UserMessage(
75+
content=prompt,
76+
role="user",
77+
),
78+
],
79+
model_id=selected_model,
80+
stream=True
81+
)
82+
for chunk in response:
83+
if chunk.event is not None and chunk.event.event_type == "progress" and chunk.event.delta.type == "text":
84+
full_response += chunk.event.delta.text
85+
message_placeholder.markdown(full_response + "▌")
86+
message_placeholder.markdown(full_response)
87+
88+
st.session_state.messages.append({"role": "assistant", "content": full_response})
89+
enableInput()
90+
st.rerun()
91+
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
streamlit==1.34.0
2+
llama_stack_client==0.2.10
3+
fire

0 commit comments

Comments
 (0)