Refactor multi-modal Streamlit inference

antonbricks · antonbricks · commit 1405131f6e76 · 2025-05-11T19:07:36.000Z
diff --git a/streamlit/views/ml_serving_invoke_mllm.py b/streamlit/views/ml_serving_invoke_mllm.py
@@ -6,72 +6,89 @@
 from databricks.sdk import WorkspaceClient
 
 w = WorkspaceClient()
-
+model_client = w.serving_endpoints.get_open_ai_client()
 
 st.header(body="AI / ML", divider=True)
 st.subheader("Invoke a multi-modal LLM")
-st.write(
-    "Upload an image and provide a prompt for multi-modal inference, e.g., using Llama 3.2."
+st.markdown(
+    "Upload an image and provide a prompt for multi-modal inference, e.g., with [Claude Sonnet 3.7](https://www.databricks.com/blog/anthropic-claude-37-sonnet-now-natively-available-databricks)."
 )
 
-tab1, tab2, tab3 = st.tabs(["**Try it**", "**Code snippet**", "**Requirements**"])
+tab1, tab2, tab3 = st.tabs(
+    ["**Try it**", "**Code snippet**", "**Requirements**"])
 
 
-def pillow_image_to_base64_string(img):
+def pillow_image_to_base64_string(image):
     """Convert a Pillow image to a base64-encoded string for API transmission."""
     buffered = io.BytesIO()
-    img.convert("RGB").save(buffered, format="JPEG")
+    image.convert("RGB").save(buffered, format="JPEG")
 
     return base64.b64encode(buffered.getvalue()).decode("utf-8")
 
 
-def chat_with_mllm(endpoint_name, prompt, image, messages=None) -> tuple[str, Dict]:
+def chat_with_mllm(endpoint_name,
+                   prompt,
+                   image,
+                   messages=None) -> tuple[str, Dict]:
     """
     Chat with a multi-modal LLM using Mosaic AI Model Serving.
     
-    This function sends the prompt and image(s) to a deployed Llama 3.2 endpoint
+    This function sends the prompt and image(s) to, e.g., a Claude Sonnet 3.7 endpoint
     using Databricks SDK.
     """
-    
-    request_data = {
-        "user_query": prompt,
-        "image": pillow_image_to_base64_string(image)
+
+    image_data = pillow_image_to_base64_string(image)
+    messages = messages or []
+
+    current_user_message = {
+        "role":
+        "user",
+        "content": [
+            {
+                "type": "text",
+                "text": prompt
+            },
+            {
+                "type": "image_url",
+                "image_url": {
+                    "url": f"data:image/jpeg;base64,{image_data}"
+                },
+            },
+        ],
     }
-    
-    response = w.serving_endpoints.query(
-        name=endpoint_name,
-        dataframe_records=[request_data]
+    messages.append(current_user_message)
+
+    completion = model_client.chat.completions.create(
+        model=endpoint_name,
+        messages=messages,
     )
-    
-    generated_text = ""
-    if response.get("predictions"):
-        generated_text = response.predictions[0]
-    
-    # Update conversation history
-    if not messages:
-        messages = [{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": prompt}]}]
-    else:
-        messages.append({"role": "user", "content": [{"type": "text", "text": prompt}]})
-    
-    messages.append({"role": "assistant", "content": [{"type": "text", "text": generated_text}]})
-    
-    return generated_text, messages
+    completion_text = completion.choices[0].message.content
+
+    messages.append({
+        "role": "assistant",
+        "content": [{
+            "type": "text",
+            "text": completion_text
+        }]
+    })
+
+    return completion_text, messages
 
 
 with tab1:
     endpoints = w.serving_endpoints.list()
     endpoint_names = [endpoint.name for endpoint in endpoints]
 
-    selected_model = st.selectbox(
-        "Select a model served by Model Serving", endpoint_names
-    )
+    selected_model = st.selectbox("Select a multi-modal Model Serving endpoint",
+                                  endpoint_names)
 
-    uploaded_file = st.file_uploader("Select an image (JPG, JPEG, or PNG)", type=["jpg", "jpeg", "png"])
+    uploaded_file = st.file_uploader("Select an image (JPG, JPEG, or PNG)",
+                                     type=["jpg", "jpeg", "png"])
 
     prompt = st.text_area(
-        "Enter your prompt:", 
+        "Enter your prompt:",
         placeholder="Describe or ask something about the image...",
-        value="Describe the images as an alternative text",
+        value="Describe the image(s) as an alternative text",
     )
 
     if uploaded_file:
@@ -81,78 +98,65 @@ def chat_with_mllm(endpoint_name, prompt, image, messages=None) -> tuple[str, Di
     if st.button("Invoke LLM"):
         if uploaded_file:
             with st.spinner("Processing..."):
-                generated_text, conversation, _ = chat_with_mllm(
+                completion_text, _ = chat_with_mllm(
                     endpoint_name=selected_model,
                     prompt=prompt,
                     image=image,
                 )
-            
-            st.write(generated_text)
+
+            st.write(completion_text)
         else:
             st.error("Please upload an image to proceed.")
 
-
 with tab2:
     st.code("""
-    import streamlit as st
-    from databricks.sdk import WorkspaceClient
-
-    w = WorkspaceClient()
-
-    openai_client = w.serving_endpoints.get_open_ai_client()
-
-    EMBEDDING_MODEL_ENDPOINT_NAME = "databricks-gte-large-en"
-
-
-    def get_embeddings(text):
-        try:
-            response = openai_client.embeddings.create(
-                model=EMBEDDING_MODEL_ENDPOINT_NAME, input=text
-            )
-            return response.data[0].embedding
-        except Exception as e:
-            st.text(f"Error generating embeddings: {e}")
+import io
+import base64
+import streamlit as st
+from PIL import Image
+from databricks.sdk import WorkspaceClient
 
+w = WorkspaceClient()
+model_client = w.serving_endpoints.get_open_ai_client()
 
-    def run_vector_search(prompt: str) -> str:
-        prompt_vector = get_embeddings(prompt)
-        if prompt_vector is None or isinstance(prompt_vector, str):
-            return f"Failed to generate embeddings: {prompt_vector}"
 
-        columns_to_fetch = [col.strip() for col in columns.split(",") if col.strip()]
+def pillow_image_to_base64_string(image):
+    buffered = io.BytesIO()
+    image.convert("RGB").save(buffered, format="JPEG")
 
-        try:
-            query_result = w.vector_search_indexes.query_index(
-                index_name=index_name,
-                columns=columns_to_fetch,
-                query_vector=prompt_vector,
-                num_results=3,
-            )
-            return query_result.result.data_array
-        except Exception as e:
-            return f"Error during vector search: {e}"
+    return base64.b64encode(buffered.getvalue()).decode("utf-8")
 
 
-    index_name = st.text_input(
-        label="Unity Catalog Vector search index:",
-        placeholder="catalog.schema.index-name",
+def chat_with_mllm(endpoint_name, prompt, image):
+    image_data = pillow_image_to_base64_string(image)
+    messages = [{
+        "role": "user",
+        "content": [
+            {"type": "text", "text": prompt},
+            {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}},
+        ],
+    }]
+    completion = model_client.chat.completions.create(
+        model=endpoint_name,
+        messages=messages,
     )
 
-    columns = st.text_input(
-        label="Columns to retrieve (comma-separated):",
-        placeholder="url, name",
-        help="Enter one or more column names present in the vector search index, separated by commas. E.g. id, text, url.",
-    )
+    return completion.choices[0].message.content
 
-    text_input = st.text_input(
-        label="Enter your search query:",
-        placeholder="What is Databricks?",
-        key="search_query_key",
-    )
+# UI elements
+endpoints = w.serving_endpoints.list()
+endpoint_names = [endpoint.name for endpoint in endpoints]
 
-    if st.button("Run vector search"):
-        result = run_vector_search(text_input)
-        st.write("Search results:")
+selected_model = st.selectbox("Select a model served by Model Serving", endpoint_names)
+uploaded_file = st.file_uploader("Select an image", type=["jpg", "jpeg", "png"])
+prompt = st.text_area("Enter your prompt:")
+
+if st.button("Invoke LLM"):
+    if uploaded_file:
+        image = Image.open(uploaded_file)
+        st.image(image, caption="Uploaded image")
+        with st.spinner("Processing..."):
+            result = chat_with_mllm(selected_model, prompt, image)
         st.write(result)
     """)
 
@@ -167,7 +171,7 @@ def run_vector_search(prompt: str) -> str:
     with col2:
         st.markdown("""
                     **Databricks resources**
-                    * Model serving endpoint
+                    * Multi-modal Model Serving endpoint
                     """)
     with col3:
         st.markdown("""