Merge pull request #886 from feloy/feat/chatbot-llama-stack-recipe

bmahabirbu · web-flow · commit a319477416d9 · 2025-07-10T09:50:00.000-04:00
feat: add chatbot recipe using llama stack
diff --git a/recipes/natural_language_processing/chatbot-llama-stack/README.md b/recipes/natural_language_processing/chatbot-llama-stack/README.md
@@ -0,0 +1,11 @@
+# Chat Application
+
+  This recipe helps developers start building their own custom LLM enabled chat applications.
+
+  There are a few options today for local Model Serving, but this recipe will use [`Llama Stack`](https://llama-stack.readthedocs.io/en/latest/).
+
+  The AI Application will connect to the Model Service via its API. The recipe relies on [Llama Stack Client Python SDK](https://github.com/meta-llama/llama-stack-client-python) to simplify communication with the Model Service and uses [Streamlit](https://streamlit.io/) for the UI layer. 
+
+## Try the Chat Application
+
+The [Podman Desktop](https://podman-desktop.io) [AI Lab Extension](https://github.com/containers/podman-desktop-extension-ai-lab) includes this recipe among others. To try it out, open `Recipes Catalog` -> `Chatbot using Llama Stack` and follow the instructions to start the application.
diff --git a/recipes/natural_language_processing/chatbot-llama-stack/ai-lab.yaml b/recipes/natural_language_processing/chatbot-llama-stack/ai-lab.yaml
@@ -0,0 +1,15 @@
+version: v1.0
+application:
+  type: language
+  name: ChatBot_Streamlit_LlamaStack
+  description: Chat with a model service in a web frontend.
+  containers:
+    - name: streamlit-llamastack-chat-app
+      contextdir: app
+      containerfile: Containerfile
+      arch:
+        - arm64
+        - amd64
+      ports:
+        - 8501
+      image: quay.io/ai-lab/chatbot-llama-stack:latest
diff --git a/recipes/natural_language_processing/chatbot-llama-stack/app/Containerfile b/recipes/natural_language_processing/chatbot-llama-stack/app/Containerfile
@@ -0,0 +1,8 @@
+FROM registry.access.redhat.com/ubi9/python-311:1-77.1726664316
+WORKDIR /chat
+COPY requirements.txt .
+RUN pip install --upgrade pip
+RUN pip install --no-cache-dir --upgrade -r /chat/requirements.txt
+COPY chatbot_ui.py .
+EXPOSE 8501
+ENTRYPOINT [ "streamlit", "run", "chatbot_ui.py" ]
diff --git a/recipes/natural_language_processing/chatbot-llama-stack/app/chatbot_ui.py b/recipes/natural_language_processing/chatbot-llama-stack/app/chatbot_ui.py
@@ -0,0 +1,91 @@
+from llama_stack_client import LlamaStackClient
+from llama_stack_client.types.shared_params.user_message import UserMessage
+
+import streamlit as st
+import requests
+import time
+import os 
+
+model_service_base = os.getenv("MODEL_ENDPOINT",
+                               "http://localhost:8001")
+model_service = f"{model_service_base}/v1"
+request_kwargs = {}
+
+client = LlamaStackClient(
+    base_url=model_service_base,
+)
+
+@st.cache_resource(show_spinner=False)
+def checking_model_service():
+    start = time.time()
+    print("Checking Model Service Availability...")
+    ready = False
+    while not ready:
+        try:
+            request_cpp = requests.get(f'{model_service}/models', **request_kwargs)
+            if request_cpp.status_code == 200:
+                ready = True
+        except Exception as inst:
+            print(inst)
+            pass
+        time.sleep(1)
+    print(f"Model Service Available")
+    print(f"{time.time()-start} seconds")
+    return id
+
+with st.spinner("Checking Model Service Availability..."):
+    checking_model_service()
+
+def enableInput():
+    st.session_state["input_disabled"] = False
+
+def disableInput():
+    st.session_state["input_disabled"] = True
+
+st.title("💬 Chatbot")  
+if "messages" not in st.session_state:
+    st.session_state["messages"] = [{"role": "assistant", 
+                                     "content": "How can I help you?"}]
+if "input_disabled" not in st.session_state:
+    enableInput()
+
+with st.sidebar:
+    st.header("Select a model")
+    available_models = client.models.list()
+    available_models = [model.identifier for model in available_models if model.api_model_type == "llm"]
+    selected_model = st.selectbox(
+        "Choose a model",
+        available_models,
+        index=0,
+    )
+
+for msg in st.session_state.messages:
+    st.chat_message(msg["role"]).write(msg["content"])
+
+if prompt := st.chat_input(disabled=st.session_state["input_disabled"],on_submit=disableInput):
+    message_placeholder = st.empty()
+    full_response = ""
+
+    st.session_state.messages.append({"role": "user", "content": prompt})
+
+    if selected_model:
+        response = client.inference.chat_completion(
+            messages=[
+                UserMessage(
+                    content=prompt,
+                    role="user",
+                ),
+            ],
+            model_id=selected_model,
+            stream=True
+        )
+        for chunk in response:
+            if chunk.event is not None and chunk.event.event_type == "progress" and chunk.event.delta.type == "text":
+                full_response += chunk.event.delta.text
+            message_placeholder.markdown(full_response + "▌")
+        message_placeholder.markdown(full_response)
+    
+        st.session_state.messages.append({"role": "assistant", "content": full_response})
+        enableInput()
+        st.rerun()
+    
diff --git a/recipes/natural_language_processing/chatbot-llama-stack/app/requirements.txt b/recipes/natural_language_processing/chatbot-llama-stack/app/requirements.txt
@@ -0,0 +1,3 @@
+streamlit==1.34.0
+llama_stack_client==0.2.10
+fire

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+streamlit==1.34.0`
	`2`	`+llama_stack_client==0.2.10`
	`3`	`+fire`