Merge pull request #13 from Robert-Ziegltrum/geo_visualization_and_input

pbv0 · web-flow · commit 509640d7a1a8 · 2025-11-12T21:08:10.000+01:00
Adding Geo input and visualization
diff --git a/docs/docs/streamlit/visualizations/_category_.json b/docs/docs/streamlit/visualizations/_category_.json
@@ -0,0 +1,9 @@
+{
+  "label": "Visualizations",
+  "position": 5,
+  "link": {
+    "type": "generated-index",
+    "description": "Display data and collect user input using charts and maps."
+  }
+}
+
diff --git a/docs/docs/streamlit/visualizations/visualizations_charts.mdx b/docs/docs/streamlit/visualizations/visualizations_charts.mdx
@@ -0,0 +1,131 @@
+---
+sidebar_position: 1
+---
+
+# Charts
+
+Use this recipe to visualize data using Streamlit's built-in chart components: area charts, line charts, and bar charts. This example demonstrates loading data from a Unity Catalog table and creating various business insights through different chart visualizations.
+
+## Code snippet
+
+### Load data from a table
+
+```python title="app.py"
+import streamlit as st
+from databricks import sql
+from databricks.sdk.core import Config
+from databricks.sdk import WorkspaceClient
+import pandas as pd
+
+cfg = Config()
+w = WorkspaceClient()
+
+# List available SQL warehouses
+warehouses = w.warehouses.list()
+warehouse_paths = {wh.name: wh.odbc_params.path for wh in warehouses}
+
+# Connect to SQL warehouse
+@st.cache_resource
+def get_connection(http_path):
+    return sql.connect(
+        server_hostname=cfg.host,
+        http_path=http_path,
+        credentials_provider=lambda: cfg.authenticate,
+    )
+
+# Read table
+def read_table(table_name, conn):
+    with conn.cursor() as cursor:
+        cursor.execute(f"SELECT * FROM {table_name} LIMIT 1000")
+        return cursor.fetchall_arrow().to_pandas()
+
+# Get data
+warehouse_name = "your_warehouse_name"
+table_name = "samples.nyctaxi.trips"
+
+http_path = warehouse_paths[warehouse_name]
+conn = get_connection(http_path)
+df = read_table(table_name, conn)
+
+# Process datetime columns
+df["tpep_pickup_datetime"] = pd.to_datetime(df["tpep_pickup_datetime"])
+df["tpep_dropoff_datetime"] = pd.to_datetime(df["tpep_dropoff_datetime"])
+df["pickup_hour"] = df["tpep_pickup_datetime"].dt.hour
+df["trip_duration_minutes"] = (df["tpep_dropoff_datetime"] - df["tpep_pickup_datetime"]).dt.total_seconds() / 60
+```
+
+### Demand analysis: Trips by hour
+
+```python title="app.py"
+import streamlit as st
+
+# Count trips by hour to understand demand patterns
+hourly_demand = df["pickup_hour"].value_counts().sort_index()
+st.bar_chart(hourly_demand)
+
+peak_hour = hourly_demand.idxmax()
+st.info(f"Peak demand hour: {peak_hour}:00 with {hourly_demand.max()} trips")
+```
+
+### Revenue analysis: Average fare by hour
+
+```python title="app.py"
+import streamlit as st
+
+# Analyze when fares are highest
+avg_fare_by_hour = df.groupby("pickup_hour")["fare_amount"].mean()
+st.line_chart(avg_fare_by_hour)
+
+best_hour = avg_fare_by_hour.idxmax()
+st.success(f"Best earning hour: {best_hour}:00")
+```
+
+### Location analysis: Top pickup zones
+
+```python title="app.py"
+import streamlit as st
+
+# Identify high-demand pickup locations
+top_pickups = df["pickup_zip"].value_counts().head(15)
+st.bar_chart(top_pickups)
+```
+
+### Cumulative revenue over time
+
+```python title="app.py"
+import streamlit as st
+
+# Track total revenue accumulation
+revenue_df = df.set_index("tpep_pickup_datetime")[["fare_amount"]].sort_index()
+revenue_df["cumulative_revenue"] = revenue_df["fare_amount"].cumsum()
+st.area_chart(revenue_df["cumulative_revenue"])
+```
+
+## Resources
+
+- [SQL warehouse](https://docs.databricks.com/aws/en/compute/sql-warehouse/)
+- [Unity Catalog table](https://docs.databricks.com/aws/en/tables/)
+
+## Permissions
+
+Your [app service principal](https://docs.databricks.com/aws/en/dev-tools/databricks-apps/#how-does-databricks-apps-manage-authorization) needs the following permissions:
+
+- `CAN USE` on the SQL warehouse
+- `SELECT` on the Unity Catalog table
+
+See Unity [Catalog privileges and securable objects](https://docs.databricks.com/aws/en/data-governance/unity-catalog/manage-privileges/privileges) for more information.
+
+## Dependencies
+
+- [Streamlit](https://pypi.org/project/streamlit/) - `streamlit`
+- [Databricks SDK](https://pypi.org/project/databricks-sdk/) - `databricks-sdk`
+- [Databricks SQL Connector](https://pypi.org/project/databricks-sql-connector/) - `databricks-sql-connector`
+- [Pandas](https://pypi.org/project/pandas/) - `pandas`
+
+```python title="requirements.txt"
+streamlit
+databricks-sdk
+databricks-sql-connector
+pandas
+```
+
diff --git a/docs/docs/streamlit/visualizations/visualizations_map.mdx b/docs/docs/streamlit/visualizations/visualizations_map.mdx
@@ -0,0 +1,112 @@
+---
+sidebar_position: 2
+---
+
+# Map display and interaction
+
+This recipe enables you to display geographic data on a map and collect user geo input through interactive map drawing. You can load location data from Unity Catalog tables or use the drawing tools to capture points, polygons, and geofences from users.
+
+## Code snippet
+
+### Display geo data from a table
+
+```python title="app.py"
+import streamlit as st
+from databricks import sql
+from databricks.sdk.core import Config
+from databricks.sdk import WorkspaceClient
+import pandas as pd
+
+cfg = Config()
+w = WorkspaceClient()
+
+# List available SQL warehouses
+warehouses = w.warehouses.list()
+warehouse_paths = {wh.name: wh.odbc_params.path for wh in warehouses}
+
+# Connect to SQL warehouse
+def get_connection(http_path):
+    return sql.connect(
+        server_hostname=cfg.host,
+        http_path=http_path,
+        credentials_provider=lambda: cfg.authenticate,
+    )
+
+# Read table
+def read_table(table_name, conn):
+    with conn.cursor() as cursor:
+        cursor.execute(f"SELECT * FROM {table_name}")
+        return cursor.fetchall_arrow().to_pandas()
+
+# Get data and display on map
+warehouse_name = "your_warehouse_name"
+table_name = "samples.accuweather.forecast_daily_calendar_metric"
+
+http_path = warehouse_paths[warehouse_name]
+conn = get_connection(http_path)
+df = read_table(table_name, conn)
+
+# Display map with latitude/longitude columns
+st.map(df, latitude="latitude", longitude="longitude")
+```
+
+### Collect user geo input
+
+```python title="app.py"
+import streamlit as st
+from streamlit_folium import st_folium
+import folium
+from folium.plugins import Draw
+
+# Create a map centered on a location
+m = folium.Map(location=[37.7749, -122.4194], zoom_start=13)
+
+# Enable drawing tools (set True for the tools you want to enable)
+draw = Draw(
+    draw_options={
+        "marker": True,      # For collecting points
+        "polygon": True,     # For collecting geofences/polygons
+        "polyline": True,    # For collecting polylines
+        "rectangle": True,   # For collecting rectangles
+        "circle": True,      # For collecting circles
+        "circlemarker": False,
+    },
+    edit_options={"edit": True},
+)
+draw.add_to(m)
+output = st_folium(m, width=700, height=500)
+
+# Access the drawn geometry
+if output["last_active_drawing"] and "geometry" in output["last_active_drawing"]:
+    geometry = output["last_active_drawing"]["geometry"]
+    st.json(geometry)
+```
+
+## Resources
+
+- [SQL warehouse](https://docs.databricks.com/aws/en/compute/sql-warehouse/) _(optional, only for reading table data)_
+- [Unity Catalog table](https://docs.databricks.com/aws/en/tables/) _(optional, only for reading table data)_
+
+## Permissions
+
+Your [app service principal](https://docs.databricks.com/aws/en/dev-tools/databricks-apps/#how-does-databricks-apps-manage-authorization) needs the following permissions:
+
+- `CAN USE` on the SQL warehouse _(only required if reading data from tables)_
+- `SELECT` on the Unity Catalog table _(only required if reading data from tables)_
+
+See Unity [Catalog privileges and securable objects](https://docs.databricks.com/aws/en/data-governance/unity-catalog/manage-privileges/privileges) for more information.
+
+## Dependencies
+
+- [Streamlit](https://pypi.org/project/streamlit/) - `streamlit`
+- [Streamlit Folium](https://pypi.org/project/streamlit-folium/) - `streamlit-folium`
+- [Databricks SDK](https://pypi.org/project/databricks-sdk/) - `databricks-sdk` _(for table data)_
+- [Databricks SQL Connector](https://pypi.org/project/databricks-sql-connector/) - `databricks-sql-connector` _(for table data)_
+
+```python title="requirements.txt"
+streamlit
+streamlit-folium
+databricks-sdk
+databricks-sql-connector
+```
+
diff --git a/streamlit/requirements.txt b/streamlit/requirements.txt
@@ -2,6 +2,7 @@ databricks-connect==16.0.0
 databricks-sdk[openai]==0.60.0
 databricks-sql-connector==4.0.0
 pandas==2.2.3
-streamlit==1.41.1
 psycopg[binary]==3.2.9
 psycopg-pool==3.2.6
+streamlit==1.41.1
+streamlit-folium==0.25.3
diff --git a/streamlit/view_groups.py b/streamlit/view_groups.py
@@ -152,6 +152,22 @@
         ],
     },
     {
+        "title": "Visualizations",
+        "views": [
+            {
+                "label": "Charts",
+                "help": "Visualize data using Streamlit's built-in chart components.",
+                "page": "views/visualizations_charts.py",
+                "icon": ":material/bar_chart:",
+            },
+            {
+                "label": "Map display and interaction",
+                "help": "Display geo information on a map and allow users to draw on the map.",
+                "page": "views/visualizations_map.py",
+                "icon": ":material/globe:",
+            },
+        ],
+    },
     "title": "External services",
     "views": [
         {
diff --git a/streamlit/views/visualizations_charts.py b/streamlit/views/visualizations_charts.py
diff --git a/streamlit/views/visualizations_map.py b/streamlit/views/visualizations_map.py